diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 263036fbf61..844e86a7787 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -58,15 +58,13 @@ jobs: - uses: actions/checkout@v6 with: persist-credentials: false - - uses: dtolnay/rust-toolchain@nightly - ## note: requires 'nightly' toolchain b/c `cargo-udeps` uses the `rustc` '-Z save-analysis' option - ## * ... ref: - uses: taiki-e/install-action@cargo-udeps - uses: Swatinem/rust-cache@v2 - name: Initialize workflow variables id: vars shell: bash run: | + echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}" # Use -Z ## VARs setup outputs() { step_id="${{ github.action }}"; for var in "$@" ; do echo steps.${step_id}.outputs.${var}="${!var}"; echo "${var}=${!var}" >> $GITHUB_OUTPUT; done; } # failure mode @@ -88,7 +86,7 @@ jobs: fault_type="${{ steps.vars.outputs.FAULT_TYPE }}" fault_prefix=$(echo "$fault_type" | tr '[:lower:]' '[:upper:]') # - cargo +nightly udeps ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --all-targets &> udeps.log || cat udeps.log + cargo udeps ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} --all-targets &> udeps.log || cat udeps.log grep --ignore-case "all deps seem to have been used" udeps.log || { printf "%s\n" "::${fault_type} ::${fault_prefix}: \`cargo udeps\`: style violation (unused dependency found)" ; fault=true ; } if [ -n "${{ steps.vars.outputs.FAIL_ON_FAULT }}" ] && [ -n "$fault" ]; then exit 1 ; fi @@ -98,6 +96,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -163,6 +162,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: matrix: job: @@ -269,6 +269,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -405,6 +406,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -444,6 +446,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -484,6 +487,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -610,6 +614,7 @@ jobs: DOCKER_OPTS: '--volume /etc/passwd:/etc/passwd --volume /etc/group:/etc/group' SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -848,16 +853,8 @@ jobs: run: | ## Test ${{ steps.vars.outputs.CARGO_CMD }} ${{ steps.vars.outputs.CARGO_CMD_OPTIONS }} test --target=${{ matrix.job.target }} \ - ${{ steps.vars.outputs.CARGO_TEST_OPTIONS}} ${{ matrix.job.cargo-options }} ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} ${{ steps.vars.outputs.CARGO_DEFAULT_FEATURES_OPTION }} - env: - RUST_BACKTRACE: "1" - - name: Test individual utilities - if: matrix.job.skip-tests != true - shell: bash - run: | - ## Test individual utilities - ${{ steps.vars.outputs.CARGO_CMD }} ${{ steps.vars.outputs.CARGO_CMD_OPTIONS }} test --target=${{ matrix.job.target }} \ - ${{ matrix.job.cargo-options }} ${{ steps.dep_vars.outputs.CARGO_UTILITY_LIST_OPTIONS }} + ${{ steps.vars.outputs.CARGO_TEST_OPTIONS}} ${{ matrix.job.cargo-options }} ${{ steps.vars.outputs.CARGO_FEATURES_OPTION }} ${{ steps.vars.outputs.CARGO_DEFAULT_FEATURES_OPTION }} \ + ${{ steps.dep_vars.outputs.CARGO_UTILITY_LIST_OPTIONS }} -p coreutils env: RUST_BACKTRACE: "1" - name: Archive executable artifacts @@ -928,6 +925,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -1009,6 +1007,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -1029,7 +1028,6 @@ jobs: - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ env.RUST_MIN_SRV }} - components: rustfmt - uses: Swatinem/rust-cache@v2 - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.9 @@ -1102,6 +1100,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -1226,7 +1225,7 @@ jobs: fail_ci_if_error: false test_separately: - name: Separate Builds + name: Separate Builds (individual and coreutils)# duplicated with other CI, but has better appearance runs-on: ${{ matrix.job.os }} strategy: fail-fast: false @@ -1236,33 +1235,6 @@ jobs: - { os: macos-latest , features: feat_os_macos } - { os: windows-latest , features: feat_os_windows } steps: - - uses: actions/checkout@v6 - with: - persist-credentials: false - - uses: dtolnay/rust-toolchain@stable - - uses: Swatinem/rust-cache@v2 - - name: build and test all programs individually - shell: bash - run: | - CARGO_FEATURES_OPTION='--features=${{ matrix.job.features }}' ; - for f in $(util/show-utils.sh ${CARGO_FEATURES_OPTION}) - do - echo "Building and testing $f" - cargo test -p "uu_$f" - done - - test_all_features: - name: Test all features separately - needs: [ min_version, deps ] - runs-on: ${{ matrix.job.os }} - strategy: - fail-fast: false - matrix: - job: - - { os: ubuntu-latest , features: feat_os_unix } - - { os: macos-latest , features: feat_os_macos } - # - { os: windows-latest , features: feat_os_windows } https://github.com/uutils/coreutils/issues/7044 - steps: - uses: actions/checkout@v6 with: persist-credentials: false @@ -1270,14 +1242,14 @@ jobs: run: sudo rm -rf /usr/share/dotnet /usr/local/lib/android & - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - - name: build and test all features individually + - name: build and test all programs individually shell: bash run: | CARGO_FEATURES_OPTION='--features=${{ matrix.job.features }}' ; for f in $(util/show-utils.sh ${CARGO_FEATURES_OPTION}) do - echo "Running tests with --features=$f and --no-default-features" - cargo test --features=$f --no-default-features + echo "Building and testing $f" + cargo test -p "uu_$f" -p coreutils --features=$f --no-default-features done test_selinux: diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index 0f8ed7fd167..86b9cfedd79 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -44,9 +44,6 @@ jobs: with: path: 'uutils' persist-credentials: false - - uses: dtolnay/rust-toolchain@master - with: - toolchain: stable - uses: Swatinem/rust-cache@v2 with: workspaces: "./uutils -> target" @@ -207,12 +204,6 @@ jobs: with: path: 'uutils' persist-credentials: false - - uses: dtolnay/rust-toolchain@master - with: - toolchain: stable - - uses: Swatinem/rust-cache@v2 - with: - workspaces: "./uutils -> target" - name: Checkout code (GNU coreutils) run: (mkdir -p gnu && cd gnu && bash ../uutils/util/fetch-gnu.sh) @@ -325,9 +316,6 @@ jobs: with: path: 'uutils' persist-credentials: false - - uses: dtolnay/rust-toolchain@master - with: - toolchain: stable - uses: Swatinem/rust-cache@v2 with: workspaces: "./uutils -> target" diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 0f4b5881a8d..eb5392a13f4 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -20,9 +20,13 @@ jobs: benchmarks: name: Run ${{ matrix.type }} benchmarks for ${{ matrix.package }} (CodSpeed) runs-on: ubuntu-latest + env: + RUSTC_WRAPPER: sccache + CARGO_INCREMENTAL: 0 + SCCACHE_GHA_ENABLED: "true" strategy: matrix: - type: [performance] # , memory] # memory profile disabled due to variance + type: [simulation] # , memory] # memory profile disabled due to variance package: [ uu_base64, uu_cksum, @@ -78,18 +82,14 @@ jobs: shell: bash run: | echo "Building ${{ matrix.type }} benchmarks for ${{ matrix.package }}" - if [ "${{ matrix.type }}" = "memory" ]; then - cargo codspeed build -m analysis -p ${{ matrix.package }} - else - cargo codspeed build -p ${{ matrix.package }} - fi + cargo codspeed build -m ${{ matrix.type }} -p ${{ matrix.package }} - name: Run ${{ matrix.type }} benchmarks for ${{ matrix.package }} uses: CodSpeedHQ/action@v4 env: CODSPEED_LOG: debug with: - mode: ${{ matrix.type == 'memory' && 'memory' || 'simulation' }} + mode: ${{ matrix.type }} run: | echo "Running ${{ matrix.type }} benchmarks for ${{ matrix.package }}" cargo codspeed run -p ${{ matrix.package }} > /dev/null diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index dcd81133ce4..c902af1528d 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -1,6 +1,6 @@ name: Code Quality -# spell-checker:ignore (people) reactivecircus Swatinem dtolnay juliangruber pell taplo +# spell-checker:ignore (people) dtolnay juliangruber pell reactivecircus Swatinem taiki-e taplo # spell-checker:ignore (misc) TERMUX noaudio pkill swiftshader esac sccache pcoreutils shopt subshell dequote libsystemd on: @@ -74,6 +74,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -147,6 +148,12 @@ jobs: CARGO_UTILITY_LIST_OPTIONS="$(for u in ${UTILITY_LIST}; do echo -n "-puu_${u} "; done;)" S=$(cargo clippy --all-targets $extra --tests --benches -pcoreutils ${CARGO_UTILITY_LIST_OPTIONS} -- -D warnings 2>&1) && printf "%s\n" "$S" || { printf "%s\n" "$S" ; printf "%s" "$S" | sed -E -n -e '/^error:/{' -e "N; s/^error:[[:space:]]+(.*)\\n[[:space:]]+-->[[:space:]]+(.*):([0-9]+):([0-9]+).*$/::${fault_type} file=\2,line=\3,col=\4::${fault_prefix}: \`cargo clippy\`: \1 (file:'\2', line:\3)/p;" -e '}' ; fault=true ; } if [ -n "${{ steps.vars.outputs.FAIL_ON_FAULT }}" ] && [ -n "$fault" ]; then exit 1 ; fi + - name: "cargo clippy on fuzz dir" + if: runner.os != 'Windows' + shell: bash + run: | + cd fuzz + cargo clippy --workspace --all-targets --all-features -- -D warnings style_spellcheck: name: Style/spelling @@ -198,8 +205,13 @@ jobs: with: persist-credentials: false + - name: Install taplo-cli + uses: taiki-e/install-action@v2 + with: + tool: taplo-cli + - name: Check - run: npx --yes @taplo/cli fmt --check + run: taplo fmt --check --diff python: name: Style/Python diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index 549f2ba85fe..4b6dcf04341 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -1,6 +1,6 @@ name: FreeBSD -# spell-checker:ignore sshfs usesh vmactions taiki Swatinem esac fdescfs fdesc sccache nextest copyback logind +# spell-checker:ignore sshfs usesh vmactions taiki Swatinem esac fdescfs fdesc nextest copyback logind env: # * style job configuration @@ -30,18 +30,10 @@ jobs: matrix: job: - { os: ubuntu-24.04 , features: unix } - env: - SCCACHE_GHA_ENABLED: "true" - RUSTC_WRAPPER: "sccache" steps: - uses: actions/checkout@v6 with: persist-credentials: false - - uses: Swatinem/rust-cache@v2 - - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.9 - with: - disable_annotations: true - name: Prepare, build and test uses: vmactions/freebsd-vm@v1.2.9 with: @@ -101,6 +93,7 @@ jobs: # To ensure that files are cleaned up, we don't want to exit on error set +e unset FAULT + export CARGO_INCREMENTAL=0 ## cargo fmt testing echo "## cargo fmt testing" # * convert any errors/warnings to GHA UI annotations; ref: @@ -127,19 +120,12 @@ jobs: - { os: ubuntu-24.04 , features: unix } env: mem: 4096 - SCCACHE_GHA_ENABLED: "true" - RUSTC_WRAPPER: "sccache" steps: - uses: actions/checkout@v6 with: persist-credentials: false - name: Avoid no space left on device (Ubuntu runner) run: sudo rm -rf /usr/share/dotnet /usr/local/lib/android & - - uses: Swatinem/rust-cache@v2 - - name: Run sccache-cache - uses: mozilla-actions/sccache-action@v0.0.9 - with: - disable_annotations: true - name: Prepare, build and test uses: vmactions/freebsd-vm@v1.2.9 with: @@ -194,6 +180,7 @@ jobs: set +e cd "${WORKSPACE}" unset FAULT + export CARGO_INCREMENTAL=0 export RUSTFLAGS="-C strip=symbols" # for disk space cargo build || FAULT=1 export PATH=~/.cargo/bin:${PATH} diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 3b1515e6924..4b5ac5e3561 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -58,15 +58,16 @@ jobs: - uses: actions/checkout@v6 with: persist-credentials: false - - uses: dtolnay/rust-toolchain@nightly - name: Install `cargo-fuzz` - run: cargo install cargo-fuzz + run: | + echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}" # Use -Z + cargo install cargo-fuzz --locked - uses: Swatinem/rust-cache@v2 with: shared-key: "cargo-fuzz-cache-key" cache-directories: "fuzz/target" - name: Run `cargo-fuzz build` - run: cargo +nightly fuzz build + run: cargo fuzz build fuzz-run: needs: fuzz-build @@ -92,18 +93,20 @@ jobs: - { name: fuzz_env, should_pass: false } - { name: fuzz_cksum, should_pass: false } - { name: fuzz_parse_glob, should_pass: true } - - { name: fuzz_parse_size, should_pass: true } - - { name: fuzz_parse_time, should_pass: true } + - { name: fuzz_parse_size, should_pass: false } + - { name: fuzz_parse_time, should_pass: false } - { name: fuzz_seq_parse_number, should_pass: false } - { name: fuzz_non_utf8_paths, should_pass: true } + - { name: fuzz_dirname, should_pass: true } steps: - uses: actions/checkout@v6 with: persist-credentials: false - - uses: dtolnay/rust-toolchain@nightly - name: Install `cargo-fuzz` - run: cargo install cargo-fuzz + run: | + echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}" # Use nightly + cargo install cargo-fuzz --locked - uses: Swatinem/rust-cache@v2 with: shared-key: "cargo-fuzz-cache-key" @@ -117,11 +120,11 @@ jobs: - name: Run ${{ matrix.test-target.name }} for XX seconds id: run_fuzzer shell: bash - continue-on-error: ${{ !matrix.test-target.name.should_pass }} + continue-on-error: ${{ !matrix.test-target.should_pass }} run: | mkdir -p fuzz/stats STATS_FILE="fuzz/stats/${{ matrix.test-target.name }}.txt" - cargo +nightly fuzz run ${{ matrix.test-target.name }} -- -max_total_time=${{ env.RUN_FOR }} -timeout=${{ env.RUN_FOR }} -detect_leaks=0 -print_final_stats=1 2>&1 | tee "$STATS_FILE" + cargo fuzz run ${{ matrix.test-target.name }} -- -max_total_time=${{ env.RUN_FOR }} -timeout=${{ env.RUN_FOR }} -detect_leaks=0 -print_final_stats=1 2>&1 | tee "$STATS_FILE" # Extract key stats from the output if grep -q "stat::number_of_executed_units" "$STATS_FILE"; then @@ -155,7 +158,7 @@ jobs: echo "Runs: $(grep -q "stat::number_of_executed_units" "$STATS_FILE" && grep "stat::number_of_executed_units" "$STATS_FILE" | awk '{print $2}' || echo "unknown")" echo "Execution Rate: $(grep -q "stat::average_exec_per_sec" "$STATS_FILE" && grep "stat::average_exec_per_sec" "$STATS_FILE" | awk '{print $2}' || echo "unknown") execs/sec" echo "New Units: $(grep -q "stat::new_units_added" "$STATS_FILE" && grep "stat::new_units_added" "$STATS_FILE" | awk '{print $2}' || echo "unknown")" - echo "Expected: ${{ matrix.test-target.name.should_pass }}" + echo "Expected: ${{ matrix.test-target.should_pass }}" if grep -q "SUMMARY: " "$STATS_FILE"; then echo "Status: $(grep "SUMMARY: " "$STATS_FILE" | head -1)" else diff --git a/.github/workflows/l10n.yml b/.github/workflows/l10n.yml index 3ccbda7761e..e9343b21138 100644 --- a/.github/workflows/l10n.yml +++ b/.github/workflows/l10n.yml @@ -28,6 +28,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -130,6 +131,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 steps: - uses: actions/checkout@v6 with: @@ -300,6 +302,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 steps: - uses: actions/checkout@v6 with: @@ -409,6 +412,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -560,6 +564,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -899,6 +904,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 steps: - uses: actions/checkout@v6 with: @@ -1130,6 +1136,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 steps: - uses: actions/checkout@v6 with: @@ -1251,6 +1258,7 @@ jobs: env: SCCACHE_GHA_ENABLED: "true" RUSTC_WRAPPER: "sccache" + CARGO_INCREMENTAL: 0 steps: - uses: actions/checkout@v6 with: diff --git a/.github/workflows/openbsd.yml b/.github/workflows/openbsd.yml index 7a14240c828..dea831b108c 100644 --- a/.github/workflows/openbsd.yml +++ b/.github/workflows/openbsd.yml @@ -47,10 +47,10 @@ jobs: prepare: | # Clean up disk space before installing packages df -h - rm -rf /usr/share/relink/* /usr/X11R6/* /usr/share/doc/* /usr/share/man/* || : pkg_add curl sudo-- jq coreutils bash rust rust-clippy rust-rustfmt llvm-- + rm -rf /usr/share/relink/* /usr/X11R6/* /usr/share/doc/* /usr/share/man/* & # Clean up package cache after installation - pkg_delete -a || true + pkg_delete -a & df -h run: | ## Prepare, build, and test @@ -58,6 +58,7 @@ jobs: # * NOTE: All steps need to be run in this block, otherwise, we are operating back on the mac host set -e # + export CARGO_INCREMENTAL=0 TEST_USER=tester REPO_NAME=${GITHUB_WORKSPACE##*/} WORKSPACE_PARENT="/home/runner/work/${REPO_NAME}" @@ -137,15 +138,15 @@ jobs: usesh: true sync: rsync copyback: false - mem: 4096 + mem: 6144 # Install rust and build dependencies from OpenBSD packages (llvm provides libclang for bindgen) prepare: | # Clean up disk space before installing packages df -h - rm -rf /usr/share/relink/* /usr/X11R6/* /usr/share/doc/* /usr/share/man/* || : + rm -rf /usr/share/relink/* /usr/X11R6/* /usr/share/doc/* /usr/share/man/* & pkg_add curl gmake sudo-- jq rust llvm-- # Clean up package cache after installation - pkg_delete -a || : + pkg_delete -a & df -h run: | ## Prepare, build, and test @@ -153,6 +154,7 @@ jobs: # * NOTE: All steps need to be run in this block, otherwise, we are operating back on the mac host set -e # + export CARGO_INCREMENTAL=0 TEST_USER=tester REPO_NAME=${GITHUB_WORKSPACE##*/} WORKSPACE_PARENT="/home/runner/work/${REPO_NAME}" @@ -194,7 +196,7 @@ jobs: set +e cd "${WORKSPACE}" unset FAULT - cargo build || FAULT=1 + # openbsd is very slow. Omit duplicated cargo build and do test only export PATH=~/.cargo/bin:${PATH} export RUST_BACKTRACE=1 export CARGO_TERM_COLOR=always @@ -208,7 +210,7 @@ jobs: cargo test --features "\$UUCORE_FEATURES" -p uucore || FAULT=1 fi # Test building with make - if (test -z "\$FAULT"); then make || FAULT=1 ; fi + if (test -z "\$FAULT"); then make MULTICALL=Y || FAULT=1 ; fi # Clean to avoid to rsync back the files and free up disk space cargo clean # Additional cleanup to free disk space diff --git a/.github/workflows/wsl2.yml b/.github/workflows/wsl2.yml index 1764a03fcd7..607a80e1d06 100644 --- a/.github/workflows/wsl2.yml +++ b/.github/workflows/wsl2.yml @@ -66,4 +66,5 @@ jobs: . "$HOME/.cargo/env" export CARGO_TERM_COLOR=always export RUST_BACKTRACE=1 + CARGO_INCREMENTAL=0 cargo nextest run --hide-progress-bar --profile ci --features '${{ matrix.job.features }}' diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index fd13529319d..0eb8b360673 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -55,6 +55,7 @@ fileio filesystem filesystems flamegraph +footgun freeram fsxattr fullblock @@ -93,6 +94,7 @@ mergeable microbenchmark microbenchmarks microbenchmarking +monomorphized multibyte multicall nmerge @@ -107,6 +109,7 @@ nolinks nonblock nonportable nonprinting +nonrepeating nonseekable notrunc nowrite @@ -114,8 +117,10 @@ noxfer ofile oflag oflags +pdeathsig peekable performant +prctl precompiled precompute preload @@ -140,8 +145,17 @@ SETFL setlocale shortcode shortcodes +setpgid sigaction +CHLD +chld +SIGCHLD +sigchld siginfo +SIGTTIN +sigttin +SIGTTOU +sigttou sigusr strcasecmp subcommand @@ -224,3 +238,8 @@ ENOTSUP enotsup SETFL tmpfs + +Hijri +Nowruz +charmap +hijri diff --git a/.vscode/cspell.dictionaries/people.wordlist.txt b/.vscode/cspell.dictionaries/people.wordlist.txt index 8fe38d88538..446c00df4b6 100644 --- a/.vscode/cspell.dictionaries/people.wordlist.txt +++ b/.vscode/cspell.dictionaries/people.wordlist.txt @@ -37,6 +37,9 @@ Boden Garman Chirag B Jadwani Chirag Jadwani +Daniel Lemire + Daniel + Lemire Derek Chiang Derek Chiang diff --git a/.vscode/cspell.dictionaries/workspace.wordlist.txt b/.vscode/cspell.dictionaries/workspace.wordlist.txt index 28c468d4f9c..30d2bd3e04b 100644 --- a/.vscode/cspell.dictionaries/workspace.wordlist.txt +++ b/.vscode/cspell.dictionaries/workspace.wordlist.txt @@ -38,6 +38,7 @@ getrandom globset indicatif itertools +itoa iuse langid lscolors diff --git a/Cargo.lock b/Cargo.lock index be10693f69e..17c05da50f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -272,6 +272,16 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "calendrical_calculations" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a0b39595c6ee54a8d0900204ba4c401d0ab4eb45adaf07178e8d017541529e7" +dependencies = [ + "core_maths", + "displaydoc", +] + [[package]] name = "cc" version = "1.2.52" @@ -327,18 +337,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.54" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +checksum = "3e34525d5bbbd55da2bb745d34b36121baac88d07619a9a09cfcf4a6c0832785" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.54" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +checksum = "59a20016a20a3da95bef50ec7238dbd09baeef4311dcdd38ec15aba69812fb61" dependencies = [ "anstream", "anstyle", @@ -374,9 +384,9 @@ dependencies = [ [[package]] name = "codspeed" -version = "4.2.1" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0d98d97fd75ca4489a1a0997820a6521531085e7c8a98941bd0e1264d567dd" +checksum = "38c2eb3388ebe26b5a0ab6bf4969d9c4840143d7f6df07caa3cc851b0606cef6" dependencies = [ "anyhow", "cc", @@ -392,9 +402,9 @@ dependencies = [ [[package]] name = "codspeed-divan-compat" -version = "4.2.1" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4179ec5518e79efcd02ed50aa483ff807902e43c85146e87fff58b9cffc06078" +checksum = "b2de65b7489a59709724d489070c6d05b7744039e4bf751d0a2006b90bb5593d" dependencies = [ "clap", "codspeed", @@ -405,9 +415,9 @@ dependencies = [ [[package]] name = "codspeed-divan-compat-macros" -version = "4.2.1" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15eaee97aa5bceb32cc683fe25cd6373b7fc48baee5c12471996b58b6ddf0d7c" +checksum = "56ca01ce4fd22b8dcc6c770dcd6b74343642e842482b94e8920d14e10c57638d" dependencies = [ "divan-macros", "itertools 0.14.0", @@ -419,9 +429,9 @@ dependencies = [ [[package]] name = "codspeed-divan-compat-walltime" -version = "4.2.1" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c38671153aa73be075d6019cab5ab1e6b31d36644067c1ac4cef73bf9723ce33" +checksum = "720ab9d0714718afe5f5832be6e5f5eb5ce97836e24ca7bf7042eea4308b9fb8" dependencies = [ "cfg-if", "clap", @@ -499,25 +509,26 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" -[[package]] -name = "convert_case" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core_maths" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30" +dependencies = [ + "libm", +] + [[package]] name = "coreutils" version = "0.6.0" dependencies = [ + "bytecount", "clap", "clap_complete", "clap_mangen", @@ -538,13 +549,14 @@ dependencies = [ "rlimit", "rstest", "selinux", - "serde", "sha1", "tempfile", "textwrap", "time", + "unicode-width 0.2.2", "unindent", "uu_arch", + "uu_b2sum", "uu_base32", "uu_base64", "uu_basename", @@ -588,6 +600,7 @@ dependencies = [ "uu_ln", "uu_logname", "uu_ls", + "uu_md5sum", "uu_mkdir", "uu_mkfifo", "uu_mknod", @@ -614,6 +627,11 @@ dependencies = [ "uu_rmdir", "uu_runcon", "uu_seq", + "uu_sha1sum", + "uu_sha224sum", + "uu_sha256sum", + "uu_sha384sum", + "uu_sha512sum", "uu_shred", "uu_shuf", "uu_sleep", @@ -651,7 +669,6 @@ dependencies = [ "walkdir", "wincode", "wincode-derive", - "xattr", "zip", ] @@ -733,7 +750,6 @@ checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ "bitflags 2.10.0", "crossterm_winapi", - "derive_more", "document-features", "filedescriptor", "mio", @@ -866,28 +882,6 @@ dependencies = [ "powerfmt", ] -[[package]] -name = "derive_more" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" -dependencies = [ - "derive_more-impl", -] - -[[package]] -name = "derive_more-impl" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" -dependencies = [ - "convert_case", - "proc-macro2", - "quote", - "rustc_version", - "syn", -] - [[package]] name = "diff" version = "0.1.13" @@ -1014,7 +1008,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -1348,6 +1342,29 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_calendar" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f0e52e009b6b16ba9c0693578796f2dd4aaa59a7f8f920423706714a89ac4e" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "ixdtf", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_calendar_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527f04223b17edfe0bd43baf14a0cb1b017830db65f3950dc00224860a9a446d" + [[package]] name = "icu_collator" version = "2.1.1" @@ -1386,6 +1403,35 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_datetime" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9d49f41ded8e63761b6b4c3120dfdc289415a1ed10107db6198eb311057ca5" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_calendar", + "icu_datetime_data", + "icu_decimal", + "icu_locale", + "icu_locale_core", + "icu_pattern", + "icu_plurals", + "icu_provider", + "icu_time", + "potential_utf", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_datetime_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46597233625417b7c8052a63d916e4fdc73df21614ac0b679492a5d6e3b01aeb" + [[package]] name = "icu_decimal" version = "2.1.1" @@ -1465,6 +1511,38 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +[[package]] +name = "icu_pattern" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a7ff8c0ff6f61cdce299dcb54f557b0a251adbc78f6f0c35a21332c452b4a1b" +dependencies = [ + "displaydoc", + "either", + "serde", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_plurals" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f9cfe49f5b1d1163cc58db451562339916a9ca5cbcaae83924d41a0bf839474" +dependencies = [ + "fixed_decimal", + "icu_locale", + "icu_plurals_data", + "icu_provider", + "zerovec", +] + +[[package]] +name = "icu_plurals_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f018a98dccf7f0eb02ba06ac0ff67d102d8ded80734724305e924de304e12ff0" + [[package]] name = "icu_properties" version = "2.1.2" @@ -1502,6 +1580,30 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_time" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8242b00da3b3b6678f731437a11c8833a43c821ae081eca60ba1b7579d45b6d8" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar", + "icu_locale_core", + "icu_provider", + "icu_time_data", + "ixdtf", + "serde", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_time_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e10b0e5e87a2c84bd5fa407705732052edebe69291d347d0c3033785470edbf" + [[package]] name = "ident_case" version = "1.0.1" @@ -1600,6 +1702,12 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "ixdtf" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84de9d95a6d2547d9b77ee3f25fa0ee32e3c3a6484d47a55adebc0439c077992" + [[package]] name = "jiff" version = "0.2.18" @@ -1612,7 +1720,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -1895,7 +2003,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2216,9 +2324,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -2249,9 +2357,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -2485,7 +2593,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2641,9 +2749,9 @@ dependencies = [ [[package]] name = "signal-hook" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a37d01603c37b5466f808de79f845c7116049b0579adb70a6b7d47c1fa3a952" +checksum = "3b57709da74f9ff9f4a27dce9526eec25ca8407c45a7887243b031a58935fb8e" dependencies = [ "libc", "signal-hook-registry", @@ -2795,7 +2903,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2994,12 +3102,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" -[[package]] -name = "unicode-segmentation" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" - [[package]] name = "unicode-width" version = "0.1.14" @@ -3076,11 +3178,22 @@ dependencies = [ "uucore", ] +[[package]] +name = "uu_b2sum" +version = "0.6.0" +dependencies = [ + "clap", + "codspeed-divan-compat", + "fluent", + "tempfile", + "uu_checksum_common", + "uucore", +] + [[package]] name = "uu_base32" version = "0.6.0" dependencies = [ - "base64-simd", "clap", "fluent", "uucore", @@ -3145,6 +3258,17 @@ dependencies = [ "uucore", ] +[[package]] +name = "uu_checksum_common" +version = "0.6.0" +dependencies = [ + "clap", + "codspeed-divan-compat", + "fluent", + "tempfile", + "uucore", +] + [[package]] name = "uu_chgrp" version = "0.6.0" @@ -3190,7 +3314,7 @@ dependencies = [ "clap", "codspeed-divan-compat", "fluent", - "tempfile", + "uu_checksum_common", "uucore", ] @@ -3219,7 +3343,6 @@ dependencies = [ "thiserror 2.0.18", "uucore", "walkdir", - "xattr", ] [[package]] @@ -3242,7 +3365,6 @@ dependencies = [ "codspeed-divan-compat", "fluent", "memchr", - "tempfile", "uucore", ] @@ -3253,6 +3375,8 @@ dependencies = [ "clap", "codspeed-divan-compat", "fluent", + "icu_calendar", + "icu_locale", "jiff", "nix", "parse_datetime", @@ -3271,7 +3395,7 @@ dependencies = [ "gcd", "libc", "nix", - "signal-hook 0.4.1", + "signal-hook 0.4.3", "tempfile", "thiserror 2.0.18", "uucore", @@ -3388,7 +3512,6 @@ dependencies = [ "num-bigint", "num-prime", "num-traits", - "rand 0.9.2", "uucore", ] @@ -3439,9 +3562,7 @@ name = "uu_hashsum" version = "0.6.0" dependencies = [ "clap", - "codspeed-divan-compat", "fluent", - "tempfile", "uucore", ] @@ -3573,6 +3694,18 @@ dependencies = [ "uutils_term_grid", ] +[[package]] +name = "uu_md5sum" +version = "0.6.0" +dependencies = [ + "clap", + "codspeed-divan-compat", + "fluent", + "tempfile", + "uu_checksum_common", + "uucore", +] + [[package]] name = "uu_mkdir" version = "0.6.0" @@ -3588,7 +3721,6 @@ version = "0.6.0" dependencies = [ "clap", "fluent", - "libc", "nix", "uucore", ] @@ -3622,7 +3754,6 @@ dependencies = [ "clap", "crossterm", "fluent", - "nix", "tempfile", "uucore", ] @@ -3694,7 +3825,6 @@ dependencies = [ "clap", "codspeed-divan-compat", "fluent", - "tempfile", "thiserror 2.0.18", "uucore", ] @@ -3855,11 +3985,70 @@ dependencies = [ "fluent", "num-bigint", "num-traits", - "tempfile", "thiserror 2.0.18", "uucore", ] +[[package]] +name = "uu_sha1sum" +version = "0.6.0" +dependencies = [ + "clap", + "codspeed-divan-compat", + "fluent", + "tempfile", + "uu_checksum_common", + "uucore", +] + +[[package]] +name = "uu_sha224sum" +version = "0.6.0" +dependencies = [ + "clap", + "codspeed-divan-compat", + "fluent", + "tempfile", + "uu_checksum_common", + "uucore", +] + +[[package]] +name = "uu_sha256sum" +version = "0.6.0" +dependencies = [ + "clap", + "codspeed-divan-compat", + "fluent", + "tempfile", + "uu_checksum_common", + "uucore", +] + +[[package]] +name = "uu_sha384sum" +version = "0.6.0" +dependencies = [ + "clap", + "codspeed-divan-compat", + "fluent", + "tempfile", + "uu_checksum_common", + "uucore", +] + +[[package]] +name = "uu_sha512sum" +version = "0.6.0" +dependencies = [ + "clap", + "codspeed-divan-compat", + "fluent", + "tempfile", + "uu_checksum_common", + "uucore", +] + [[package]] name = "uu_shred" version = "0.6.0" @@ -3878,9 +4067,11 @@ dependencies = [ "clap", "codspeed-divan-compat", "fluent", + "itoa", "rand 0.9.2", + "rand_chacha 0.9.0", "rand_core 0.9.5", - "tempfile", + "sha3", "uucore", ] @@ -3913,7 +4104,6 @@ dependencies = [ "self_cell", "tempfile", "thiserror 2.0.18", - "unicode-width 0.2.2", "uucore", ] @@ -3964,6 +4154,7 @@ dependencies = [ name = "uu_stty" version = "0.6.0" dependencies = [ + "cfg_aliases", "clap", "fluent", "nix", @@ -4018,7 +4209,6 @@ dependencies = [ "rstest", "same-file", "uucore", - "winapi-util", "windows-sys 0.61.2", ] @@ -4106,7 +4296,6 @@ dependencies = [ "fluent", "nix", "string-interner", - "tempfile", "thiserror 2.0.18", "uucore", ] @@ -4140,7 +4329,6 @@ dependencies = [ "fluent", "tempfile", "thiserror 2.0.18", - "unicode-width 0.2.2", "uucore", ] @@ -4151,7 +4339,6 @@ dependencies = [ "clap", "codspeed-divan-compat", "fluent", - "tempfile", "uucore", ] @@ -4172,7 +4359,6 @@ dependencies = [ "fluent", "jiff", "thiserror 2.0.18", - "utmp-classic", "uucore", ] @@ -4237,7 +4423,6 @@ dependencies = [ "clap", "fluent", "itertools 0.14.0", - "nix", "uucore", ] @@ -4263,7 +4448,9 @@ dependencies = [ "fluent-syntax", "glob", "hex", + "icu_calendar", "icu_collator", + "icu_datetime", "icu_decimal", "icu_locale", "icu_provider", @@ -4275,7 +4462,6 @@ dependencies = [ "nix", "num-traits", "os_display", - "phf", "procfs", "selinux", "sha1", @@ -4463,7 +4649,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4736,6 +4922,9 @@ name = "writeable" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +dependencies = [ + "either", +] [[package]] name = "wyz" diff --git a/Cargo.toml b/Cargo.toml index e1f7083cdf5..12e062f8f12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,6 +86,13 @@ feat_common_core = [ "basenc", "cat", "cksum", + "b2sum", + "md5sum", + "sha1sum", + "sha224sum", + "sha256sum", + "sha384sum", + "sha512sum", "comm", "cp", "csplit", @@ -154,7 +161,6 @@ feat_common_core = [ # "feat_Tier1" == expanded set of utilities which can be built/run on the usual rust "Tier 1" target platforms (ref: ) feat_Tier1 = [ "feat_common_core", - # "arch", "hostname", "nproc", @@ -315,7 +321,7 @@ clap = { version = "4.5", features = ["wrap_help", "cargo", "color"] } clap_complete = "4.4" clap_mangen = "0.2" compare = "0.1.0" -crossterm = "0.29.0" +crossterm = { version = "0.29.0", default-features = false } ctor = "0.6.0" ctrlc = { version = "3.4.7", features = ["termination"] } divan = { package = "codspeed-divan-compat", version = "4.0.5" } @@ -330,12 +336,15 @@ gcd = "2.3" glob = "0.3.1" half = "2.4.1" hostname = "0.4" +icu_calendar = "2.0.0" icu_collator = "2.0.0" +icu_datetime = "2.0.0" icu_decimal = "2.0.0" icu_locale = "2.0.0" icu_provider = "2.0.0" indicatif = "0.18.0" itertools = "0.14.0" +itoa = "1.0.15" jiff = "0.2.18" libc = "0.2.172" lscolors = { version = "0.21.0", default-features = false, features = [ @@ -356,9 +365,11 @@ phf_codegen = "0.13.1" platform-info = "2.0.3" procfs = "0.18" rand = { version = "0.9.0", features = ["small_rng"] } +rand_chacha = { version = "0.9.0" } rand_core = "0.9.0" rayon = "1.10" regex = "1.10.4" +rlimit = "0.10.1" rstest = "0.26.0" rust-ini = "0.21.0" same-file = "1.0.6" @@ -403,6 +414,7 @@ uucore = { version = "0.6.0", package = "uucore", path = "src/uucore" } uucore_procs = { version = "0.6.0", package = "uucore_procs", path = "src/uucore_procs" } uu_ls = { version = "0.6.0", path = "src/uu/ls" } uu_base32 = { version = "0.6.0", path = "src/uu/base32" } +uu_checksum_common = { version = "0.6.0", path = "src/uu/checksum_common" } uutests = { version = "0.6.0", package = "uutests", path = "tests/uutests" } [dependencies] @@ -432,6 +444,13 @@ chmod = { optional = true, version = "0.6.0", package = "uu_chmod", path = "src/ chown = { optional = true, version = "0.6.0", package = "uu_chown", path = "src/uu/chown" } chroot = { optional = true, version = "0.6.0", package = "uu_chroot", path = "src/uu/chroot" } cksum = { optional = true, version = "0.6.0", package = "uu_cksum", path = "src/uu/cksum" } +b2sum = { optional = true, version = "0.6.0", package = "uu_b2sum", path = "src/uu/b2sum" } +md5sum = { optional = true, version = "0.6.0", package = "uu_md5sum", path = "src/uu/md5sum" } +sha1sum = { optional = true, version = "0.6.0", package = "uu_sha1sum", path = "src/uu/sha1sum" } +sha224sum = { optional = true, version = "0.6.0", package = "uu_sha224sum", path = "src/uu/sha224sum" } +sha256sum = { optional = true, version = "0.6.0", package = "uu_sha256sum", path = "src/uu/sha256sum" } +sha384sum = { optional = true, version = "0.6.0", package = "uu_sha384sum", path = "src/uu/sha384sum" } +sha512sum = { optional = true, version = "0.6.0", package = "uu_sha512sum", path = "src/uu/sha512sum" } comm = { optional = true, version = "0.6.0", package = "uu_comm", path = "src/uu/comm" } cp = { optional = true, version = "0.6.0", package = "uu_cp", path = "src/uu/cp" } csplit = { optional = true, version = "0.6.0", package = "uu_csplit", path = "src/uu/csplit" } @@ -536,6 +555,7 @@ filetime.workspace = true glob.workspace = true jiff.workspace = true libc.workspace = true +bytecount.workspace = true num-prime.workspace = true pretty_assertions = "1.4.0" rand.workspace = true @@ -543,6 +563,7 @@ regex.workspace = true sha1 = { workspace = true, features = ["std"] } tempfile.workspace = true time = { workspace = true, features = ["local-offset"] } +unicode-width.workspace = true unindent = "0.2.3" uutests.workspace = true uucore = { workspace = true, features = [ @@ -562,16 +583,14 @@ nix = { workspace = true, features = [ "process", "signal", "socket", - "user", "term", + "user", ] } -rlimit = "0.10.1" -xattr.workspace = true +rlimit = { workspace = true } # Used in test_uptime::test_uptime_with_file_containing_valid_boot_time_utmpx_record # to deserialize an utmpx struct into a binary file [target.'cfg(all(target_family= "unix",not(target_os = "macos")))'.dev-dependencies] -serde = { version = "1.0.202", features = ["derive"] } wincode = "0.2.5" wincode-derive = "0.2.3" @@ -678,3 +697,6 @@ format_push_string = "allow" flat_map_option = "allow" from_iter_instead_of_collect = "allow" large_types_passed_by_value = "allow" + +[workspace.metadata.cargo-shear] +ignored = ["fluent", "libstdbuf"] diff --git a/GNUmakefile b/GNUmakefile index fa824d62680..b1c8ff2dce1 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -78,106 +78,15 @@ endif LN ?= ln -sf # Possible programs -PROGS := \ - arch \ - base32 \ - base64 \ - basenc \ - basename \ - cat \ - cksum \ - comm \ - cp \ - csplit \ - cut \ - date \ - dd \ - df \ - dir \ - dircolors \ - dirname \ - du \ - echo \ - env \ - expand \ - expr \ - factor \ - false \ - fmt \ - fold \ - hashsum \ - head \ - hostname \ - join \ - link \ - ln \ - ls \ - mkdir \ - mktemp \ - more \ - mv \ - nl \ - numfmt \ - nproc \ - od \ - paste \ - pr \ - printenv \ - printf \ - ptx \ - pwd \ - readlink \ - realpath \ - rm \ - rmdir \ - seq \ - shred \ - shuf \ - sleep \ - sort \ - split \ - sum \ - sync \ - tac \ - tail \ - tee \ - test \ - touch \ - tr \ - true \ - truncate \ - tsort \ - uname \ - unexpand \ - uniq \ - unlink \ - vdir \ - wc \ - whoami \ - yes +PROGS := \ + $(shell sed -n '/feat_Tier1 = \[/,/\]/p' Cargo.toml | sed '1d;2d' |tr -d '],"\n')\ + $(shell sed -n '/feat_common_core = \[/,/\]/p' Cargo.toml | sed '1d' |tr -d '],"\n') UNIX_PROGS := \ - chgrp \ - chmod \ - chown \ - chroot \ - groups \ + $(shell sed -n '/feat_require_unix_core = \[/,/\]/p' Cargo.toml | sed '1d' |tr -d '],"\n') \ hostid \ - id \ - install \ - kill \ - logname \ - mkfifo \ - mknod \ - nice \ - nohup \ - pathchk \ pinky \ - stat \ stdbuf \ - stty \ - timeout \ - tty \ uptime \ users \ who @@ -186,15 +95,6 @@ SELINUX_PROGS := \ chcon \ runcon -HASHSUM_PROGS := \ - b2sum \ - md5sum \ - sha1sum \ - sha224sum \ - sha256sum \ - sha384sum \ - sha512sum - $(info Detected OS = $(OS)) ifeq (,$(findstring MINGW,$(OS))) @@ -215,78 +115,9 @@ ifneq ($(findstring stdbuf,$(UTILS)),) endif # Programs with usable tests -TEST_PROGS := \ - base32 \ - base64 \ - basename \ - cat \ - chcon \ - chgrp \ - chmod \ - chown \ - cksum \ - comm \ - cp \ - csplit \ - cut \ - date \ - dircolors \ - dirname \ - echo \ - env \ - expr \ - factor \ - false \ - fold \ - hashsum \ - head \ - install \ - link \ - ln \ - ls \ - mkdir \ - mktemp \ - mv \ - nl \ - numfmt \ - od \ - paste \ - pathchk \ - pinky \ - pr \ - printf \ - ptx \ - pwd \ - readlink \ - realpath \ - rm \ - rmdir \ - runcon \ - seq \ - sleep \ - sort \ - split \ - stat \ - stdbuf \ - sum \ - tac \ - tail \ - test \ - touch \ - tr \ - true \ - truncate \ - tsort \ - uname \ - unexpand \ - uniq \ - unlink \ - uudoc \ - wc \ - who TESTS := \ - $(sort $(filter $(UTILS),$(TEST_PROGS))) + $(sort $(filter $(UTILS),$(PROGS) $(UNIX_PROGS) $(SELINUX_PROGS))) TEST_NO_FAIL_FAST := TEST_SPEC_FEATURE := diff --git a/README.package.md b/README.package.md index 355b153db28..ebf7724f638 100644 --- a/README.package.md +++ b/README.package.md @@ -14,7 +14,7 @@ [![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils) [![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils) -![MSRV](https://img.shields.io/badge/MSRV-1.70.0-brightgreen) +![MSRV](https://img.shields.io/badge/MSRV-1.85.0-brightgreen) diff --git a/build.rs b/build.rs index aabd968329b..a7eb9031225 100644 --- a/build.rs +++ b/build.rs @@ -89,15 +89,6 @@ pub fn main() { } "hashsum" => { phf_map.entry(krate, format!("({krate}::uumain, {krate}::uu_app_custom)")); - - let map_value = format!("({krate}::uumain, {krate}::uu_app_common)"); - phf_map.entry("md5sum", map_value.clone()); - phf_map.entry("sha1sum", map_value.clone()); - phf_map.entry("sha224sum", map_value.clone()); - phf_map.entry("sha256sum", map_value.clone()); - phf_map.entry("sha384sum", map_value.clone()); - phf_map.entry("sha512sum", map_value.clone()); - phf_map.entry("b2sum", map_value.clone()); } _ => { phf_map.entry(krate, map_value.clone()); diff --git a/fuzz/.cargo/config.toml b/fuzz/.cargo/config.toml new file mode 100644 index 00000000000..5d1a2a27fca --- /dev/null +++ b/fuzz/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["--cfg", "fuzzing"] diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 1ec35d31418..62bd4b15749 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -194,6 +194,16 @@ version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" +[[package]] +name = "calendrical_calculations" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a0b39595c6ee54a8d0900204ba4c401d0ab4eb45adaf07178e8d017541529e7" +dependencies = [ + "core_maths", + "displaydoc", +] + [[package]] name = "cc" version = "1.2.51" @@ -314,6 +324,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core_maths" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30" +dependencies = [ + "libm", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -649,6 +668,29 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_calendar" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f0e52e009b6b16ba9c0693578796f2dd4aaa59a7f8f920423706714a89ac4e" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "ixdtf", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_calendar_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527f04223b17edfe0bd43baf14a0cb1b017830db65f3950dc00224860a9a446d" + [[package]] name = "icu_collator" version = "2.1.1" @@ -687,6 +729,35 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_datetime" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9d49f41ded8e63761b6b4c3120dfdc289415a1ed10107db6198eb311057ca5" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_calendar", + "icu_datetime_data", + "icu_decimal", + "icu_locale", + "icu_locale_core", + "icu_pattern", + "icu_plurals", + "icu_provider", + "icu_time", + "potential_utf", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_datetime_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46597233625417b7c8052a63d916e4fdc73df21614ac0b679492a5d6e3b01aeb" + [[package]] name = "icu_decimal" version = "2.1.1" @@ -766,6 +837,38 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +[[package]] +name = "icu_pattern" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a7ff8c0ff6f61cdce299dcb54f557b0a251adbc78f6f0c35a21332c452b4a1b" +dependencies = [ + "displaydoc", + "either", + "serde", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_plurals" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f9cfe49f5b1d1163cc58db451562339916a9ca5cbcaae83924d41a0bf839474" +dependencies = [ + "fixed_decimal", + "icu_locale", + "icu_plurals_data", + "icu_provider", + "zerovec", +] + +[[package]] +name = "icu_plurals_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f018a98dccf7f0eb02ba06ac0ff67d102d8ded80734724305e924de304e12ff0" + [[package]] name = "icu_properties" version = "2.1.2" @@ -803,6 +906,30 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_time" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8242b00da3b3b6678f731437a11c8833a43c821ae081eca60ba1b7579d45b6d8" +dependencies = [ + "calendrical_calculations", + "displaydoc", + "icu_calendar", + "icu_locale_core", + "icu_provider", + "icu_time_data", + "ixdtf", + "serde", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_time_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e10b0e5e87a2c84bd5fa407705732052edebe69291d347d0c3033785470edbf" + [[package]] name = "intl-memoizer" version = "0.5.3" @@ -837,6 +964,12 @@ dependencies = [ "either", ] +[[package]] +name = "ixdtf" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84de9d95a6d2547d9b77ee3f25fa0ee32e3c3a6484d47a55adebc0439c077992" + [[package]] name = "jiff" version = "0.2.18" @@ -1107,25 +1240,6 @@ dependencies = [ "winnow", ] -[[package]] -name = "phf" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" -dependencies = [ - "phf_shared", - "serde", -] - -[[package]] -name = "phf_shared" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" -dependencies = [ - "siphasher", -] - [[package]] name = "pkg-config" version = "0.3.32" @@ -1391,12 +1505,6 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - [[package]] name = "sm3" version = "0.4.2" @@ -1584,12 +1692,22 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uu_checksum_common" +version = "0.6.0" +dependencies = [ + "clap", + "fluent", + "uucore", +] + [[package]] name = "uu_cksum" version = "0.6.0" dependencies = [ "clap", "fluent", + "uu_checksum_common", "uucore", ] @@ -1610,6 +1728,8 @@ version = "0.6.0" dependencies = [ "clap", "fluent", + "icu_calendar", + "icu_locale", "jiff", "nix", "parse_datetime", @@ -1617,6 +1737,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "uu_dirname" +version = "0.6.0" +dependencies = [ + "clap", + "fluent", + "uucore", +] + [[package]] name = "uu_echo" version = "0.6.0" @@ -1692,7 +1821,6 @@ dependencies = [ "self_cell", "tempfile", "thiserror", - "unicode-width", "uucore", ] @@ -1762,7 +1890,9 @@ dependencies = [ "fluent-syntax", "glob", "hex", + "icu_calendar", "icu_collator", + "icu_datetime", "icu_decimal", "icu_locale", "icu_provider", @@ -1773,7 +1903,6 @@ dependencies = [ "nix", "num-traits", "os_display", - "phf", "procfs", "sha1", "sha2", @@ -1798,6 +1927,7 @@ dependencies = [ "uu_cksum", "uu_cut", "uu_date", + "uu_dirname", "uu_echo", "uu_env", "uu_expr", @@ -2090,6 +2220,9 @@ name = "writeable" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +dependencies = [ + "either", +] [[package]] name = "yoke" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index d3c987f229a..6d5e2d4d602 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -41,6 +41,7 @@ uu_split = { path = "../src/uu/split" } uu_tr = { path = "../src/uu/tr" } uu_env = { path = "../src/uu/env" } uu_cksum = { path = "../src/uu/cksum" } +uu_dirname = { path = "../src/uu/dirname" } [[bin]] name = "fuzz_date" @@ -149,3 +150,9 @@ name = "fuzz_non_utf8_paths" path = "fuzz_targets/fuzz_non_utf8_paths.rs" test = false doc = false + +[[bin]] +name = "fuzz_dirname" +path = "fuzz_targets/fuzz_dirname.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/fuzz_dirname.rs b/fuzz/fuzz_targets/fuzz_dirname.rs new file mode 100644 index 00000000000..bfb127a5a5a --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_dirname.rs @@ -0,0 +1,211 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +#![no_main] +use libfuzzer_sys::fuzz_target; +use uu_dirname::uumain; + +use rand::Rng; +use rand::prelude::IndexedRandom; +use std::ffi::OsString; + +use uufuzz::CommandResult; +use uufuzz::{compare_result, generate_and_run_uumain, generate_random_string, run_gnu_cmd}; + +static CMD_PATH: &str = "dirname"; + +fn generate_dirname_args() -> Vec { + let mut rng = rand::rng(); + let mut args = Vec::new(); + + // 20% chance to include -z/--zero flag + if rng.random_bool(0.2) { + if rng.random_bool(0.5) { + args.push("-z".to_string()); + } else { + args.push("--zero".to_string()); + } + } + + // 30% chance to use one of the specific issue #8924 cases + if rng.random_bool(0.3) { + let issue_cases = [ + "foo//.", + "foo/./", + "foo/bar/./", + "bar//.", + "test/./", + "a/b/./", + "x//.", + "dir/subdir/./", + ]; + args.push(issue_cases.choose(&mut rng).unwrap().to_string()); + } else { + // Generate 1-3 path arguments normally + let num_paths = rng.random_range(1..=3); + for _ in 0..num_paths { + args.push(generate_path()); + } + } + + args +} + +fn generate_path() -> String { + let mut rng = rand::rng(); + + // Different types of paths to test + let path_type = rng.random_range(0..15); + + match path_type { + // Simple paths + 0 => generate_random_string(rng.random_range(1..=20)), + + // Paths with slashes + 1 => { + let mut path = String::new(); + let components = rng.random_range(1..=5); + for i in 0..components { + if i > 0 { + path.push('/'); + } + path.push_str(&generate_random_string(rng.random_range(1..=10))); + } + path + } + + // Root path + 2 => "/".to_string(), + + // Absolute paths + 3 => { + let mut path = "/".to_string(); + let components = rng.random_range(1..=4); + for _ in 0..components { + path.push_str(&generate_random_string(rng.random_range(1..=8))); + path.push('/'); + } + // Remove trailing slash sometimes + if rng.random_bool(0.5) && path.len() > 1 { + path.pop(); + } + path + } + + // Paths ending with "/." (specific case from issue #8924) + 4 => { + let base = if rng.random_bool(0.3) { + "/".to_string() + } else { + format!("/{}", generate_random_string(rng.random_range(1..=10))) + }; + format!("{}.", base) + } + + // Paths with multiple slashes + 5 => { + let base = generate_random_string(rng.random_range(1..=10)); + format!( + "///{}//{}", + base, + generate_random_string(rng.random_range(1..=8)) + ) + } + + // Paths with dots + 6 => { + let components = [".", "..", "...", "...."]; + let chosen = components.choose(&mut rng).unwrap(); + if rng.random_bool(0.5) { + format!("/{}", chosen) + } else { + chosen.to_string() + } + } + + // Single character paths + 7 => { + let chars = ['a', 'x', '1', '-', '_', '.']; + chars.choose(&mut rng).unwrap().to_string() + } + + // Empty string (edge case) + 8 => "".to_string(), + + // Issue #8924 specific cases: paths like "foo//." + 9 => { + let base = generate_random_string(rng.random_range(1..=10)); + format!("{}//.", base) + } + + // Issue #8924 specific cases: paths like "foo/./" + 10 => { + let base = generate_random_string(rng.random_range(1..=10)); + format!("{}/./", base) + } + + // Issue #8924 specific cases: paths like "foo/bar/./" + 11 => { + let base1 = generate_random_string(rng.random_range(1..=8)); + let base2 = generate_random_string(rng.random_range(1..=8)); + format!("{}/{}/./", base1, base2) + } + + // More complex patterns with ./ and multiple slashes + 12 => { + let base = generate_random_string(rng.random_range(1..=10)); + let patterns = ["/./", "//./", "//.//", "/.//"]; + let pattern = patterns.choose(&mut rng).unwrap(); + format!("{}{}", base, pattern) + } + + // Patterns with .. and multiple slashes + 13 => { + let base = generate_random_string(rng.random_range(1..=10)); + let patterns = ["/..", "//..", "/../", "//..//"]; + let pattern = patterns.choose(&mut rng).unwrap(); + format!("{}{}", base, pattern) + } + + // Complex paths with special cases + _ => { + let special_endings = [".", "..", "/.", "/..", "//", "/", "/./.", "//.", "./"]; + let base = generate_random_string(rng.random_range(1..=15)); + let ending = special_endings.choose(&mut rng).unwrap(); + format!("{}{}", base, ending) + } + } +} + +fuzz_target!(|_data: &[u8]| { + let dirname_args = generate_dirname_args(); + let mut args = vec![OsString::from("dirname")]; + args.extend(dirname_args.iter().map(OsString::from)); + + let rust_result = generate_and_run_uumain(&args, uumain, None); + + let gnu_result = match run_gnu_cmd(CMD_PATH, &args[1..], false, None) { + Ok(result) => result, + Err(error_result) => { + eprintln!("Failed to run GNU command:"); + eprintln!("Stderr: {}", error_result.stderr); + eprintln!("Exit Code: {}", error_result.exit_code); + CommandResult { + stdout: String::new(), + stderr: error_result.stderr, + exit_code: error_result.exit_code, + } + } + }; + + compare_result( + "dirname", + &format!("{:?}", &args[1..]), + None, + &rust_result, + &gnu_result, + false, + ); +}); diff --git a/fuzz/fuzz_targets/fuzz_non_utf8_paths.rs b/fuzz/fuzz_targets/fuzz_non_utf8_paths.rs index ac7480f3230..82e5374844b 100644 --- a/fuzz/fuzz_targets/fuzz_non_utf8_paths.rs +++ b/fuzz/fuzz_targets/fuzz_non_utf8_paths.rs @@ -14,7 +14,7 @@ use std::env::temp_dir; use std::ffi::{OsStr, OsString}; use std::fs; use std::os::unix::ffi::{OsStrExt, OsStringExt}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use uufuzz::{CommandResult, run_gnu_cmd}; // Programs that typically take file/path arguments and should be tested @@ -148,7 +148,7 @@ fn setup_test_files() -> Result<(PathBuf, Vec), std::io::Error> { // Try to create the file - this may fail on some filesystems if let Ok(mut file) = fs::File::create(&file_path) { use std::io::Write; - let _ = write!(file, "test content for file {}\n", i); + let _ = writeln!(file, "test content for file {}", i); test_files.push(file_path); } } @@ -156,7 +156,7 @@ fn setup_test_files() -> Result<(PathBuf, Vec), std::io::Error> { Ok((temp_root, test_files)) } -fn test_program_with_non_utf8_path(program: &str, path: &PathBuf) -> CommandResult { +fn test_program_with_non_utf8_path(program: &str, path: &Path) -> CommandResult { let path_os = path.as_os_str(); // Use the locally built uutils binary instead of system PATH diff --git a/fuzz/fuzz_targets/fuzz_test.rs b/fuzz/fuzz_targets/fuzz_test.rs index 894a1dcd56b..176ab9aba8f 100644 --- a/fuzz/fuzz_targets/fuzz_test.rs +++ b/fuzz/fuzz_targets/fuzz_test.rs @@ -135,9 +135,9 @@ fn generate_test_arg() -> String { if test_arg.arg_type == ArgType::INTEGER { arg.push_str(&format!( "{} {} {}", - rng.random_range(-100..=100).to_string(), + rng.random_range(-100..=100), test_arg.arg, - rng.random_range(-100..=100).to_string() + rng.random_range(-100..=100) )); } else if test_arg.arg_type == ArgType::STRINGSTRING { let random_str = generate_random_string(rng.random_range(1..=10)); diff --git a/src/common/validation.rs b/src/common/validation.rs index f3923adb872..d723ca9262a 100644 --- a/src/common/validation.rs +++ b/src/common/validation.rs @@ -50,11 +50,6 @@ fn get_canonical_util_name(util_name: &str) -> &str { // uu_test aliases - '[' is an alias for test "[" => "test", - // hashsum aliases - all these hash commands are aliases for hashsum - "md5sum" | "sha1sum" | "sha224sum" | "sha256sum" | "sha384sum" | "sha512sum" | "b2sum" => { - "hashsum" - } - "dir" => "ls", // dir is an alias for ls // Default case - return the util name as is @@ -85,7 +80,6 @@ mod tests { fn test_get_canonical_util_name() { // Test a few key aliases assert_eq!(get_canonical_util_name("["), "test"); - assert_eq!(get_canonical_util_name("md5sum"), "hashsum"); assert_eq!(get_canonical_util_name("dir"), "ls"); // Test passthrough case diff --git a/src/uu/arch/src/arch.rs b/src/uu/arch/src/arch.rs index 7d1867763f9..a01b608749f 100644 --- a/src/uu/arch/src/arch.rs +++ b/src/uu/arch/src/arch.rs @@ -3,9 +3,9 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use platform_info::*; - use clap::Command; +use platform_info::*; +use std::io::{Write, stdout}; use uucore::error::{UResult, USimpleError}; use uucore::translate; @@ -16,7 +16,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let uts = PlatformInfo::new().map_err(|_e| USimpleError::new(1, translate!("cannot-get-system")))?; - println!("{}", uts.machine().to_string_lossy().trim()); + writeln!(stdout(), "{}", uts.machine().to_string_lossy().trim())?; Ok(()) } diff --git a/src/uu/b2sum/Cargo.toml b/src/uu/b2sum/Cargo.toml new file mode 100644 index 00000000000..61b0b702aef --- /dev/null +++ b/src/uu/b2sum/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "uu_b2sum" +description = "b2sum ~ (uutils) Print or check the BLAKE2b checksums" +repository = "https://github.com/uutils/coreutils/tree/main/src/uu/b2sum" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[lib] +path = "src/b2sum.rs" + +[dependencies] +clap = { workspace = true } +uu_checksum_common = { workspace = true } +uucore = { workspace = true, features = [ + "checksum", + "encoding", + "sum", + "hardware", +] } +fluent = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } + +[[bin]] +name = "b2sum" +path = "src/main.rs" diff --git a/src/uu/b2sum/LICENSE b/src/uu/b2sum/LICENSE new file mode 120000 index 00000000000..5853aaea53b --- /dev/null +++ b/src/uu/b2sum/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/b2sum/locales/en-US.ftl b/src/uu/b2sum/locales/en-US.ftl new file mode 100644 index 00000000000..a5ab9ea7ebf --- /dev/null +++ b/src/uu/b2sum/locales/en-US.ftl @@ -0,0 +1,2 @@ +b2sum-about = Print or check the BLAKE2b checksums +b2sum-usage = b2sum [OPTIONS] [FILE]... diff --git a/src/uu/b2sum/locales/fr-FR.ftl b/src/uu/b2sum/locales/fr-FR.ftl new file mode 100644 index 00000000000..7cb93e5d8d9 --- /dev/null +++ b/src/uu/b2sum/locales/fr-FR.ftl @@ -0,0 +1,2 @@ +b2sum-about = Afficher le BLAKE2b et la taille de chaque fichier +b2sum-usage = b2sum [OPTION]... [FICHIER]... diff --git a/src/uu/b2sum/src/b2sum.rs b/src/uu/b2sum/src/b2sum.rs new file mode 100644 index 00000000000..502bd8b53be --- /dev/null +++ b/src/uu/b2sum/src/b2sum.rs @@ -0,0 +1,29 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore (ToDO) algo + +use clap::Command; + +use uu_checksum_common::{standalone_checksum_app_with_length, standalone_with_length_main}; + +use uucore::checksum::{AlgoKind, calculate_blake2b_length_str}; +use uucore::error::UResult; +use uucore::translate; + +#[uucore::main] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + standalone_with_length_main( + AlgoKind::Blake2b, + uu_app(), + args, + calculate_blake2b_length_str, + ) +} + +#[inline] +pub fn uu_app() -> Command { + standalone_checksum_app_with_length(translate!("b2sum-about"), translate!("b2sum-usage")) +} diff --git a/src/uu/b2sum/src/main.rs b/src/uu/b2sum/src/main.rs new file mode 100644 index 00000000000..422fa2fe709 --- /dev/null +++ b/src/uu/b2sum/src/main.rs @@ -0,0 +1 @@ +uucore::bin!(uu_b2sum); diff --git a/src/uu/base32/Cargo.toml b/src/uu/base32/Cargo.toml index fe51e6865c0..2318911b517 100644 --- a/src/uu/base32/Cargo.toml +++ b/src/uu/base32/Cargo.toml @@ -21,7 +21,6 @@ path = "src/base32.rs" clap = { workspace = true } uucore = { workspace = true, features = ["encoding"] } fluent = { workspace = true } -base64-simd = "0.8" [[bin]] name = "base32" diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index d14642bfc6d..b7fef0ac20d 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -8,7 +8,7 @@ use clap::{Arg, ArgAction, Command}; use std::ffi::OsString; use std::fs::File; -use std::io::{self, BufRead, BufReader, ErrorKind, Write}; +use std::io::{self, BufRead, BufReader, Write}; use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::encoding::{ @@ -16,7 +16,7 @@ use uucore::encoding::{ SupportsFastDecodeAndEncode, Z85Wrapper, for_base_common::{BASE32, BASE32HEX, BASE64URL, HEXUPPER_PERMISSIVE}, }; -use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; +use uucore::error::{FromIo, UResult, USimpleError, UUsageError, strip_errno}; use uucore::format_usage; use uucore::translate; @@ -179,7 +179,7 @@ pub fn handle_input(input: &mut R, format: Format, config: Config) - let mut buffered = Vec::new(); input .read_to_end(&mut buffered) - .map_err(|err| USimpleError::new(1, format_read_error(err.kind())))?; + .map_err(|err| USimpleError::new(1, format_read_error(&err)))?; if config.decode { fast_decode::fast_decode_buffer( buffered, @@ -556,7 +556,7 @@ pub mod fast_encode { loop { let read_buffer = input .fill_buf() - .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; + .map_err(|err| USimpleError::new(1, super::format_read_error(&err)))?; if read_buffer.is_empty() { break; } @@ -823,7 +823,7 @@ pub mod fast_decode { loop { let read_buffer = input .fill_buf() - .map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?; + .map_err(|err| USimpleError::new(1, super::format_read_error(&err)))?; let read_len = read_buffer.len(); if read_len == 0 { break; @@ -919,23 +919,8 @@ pub mod fast_decode { } } -fn format_read_error(kind: ErrorKind) -> String { - let kind_string = kind.to_string(); - - // e.g. "is a directory" -> "Is a directory" - let mut kind_string_capitalized = String::with_capacity(kind_string.len()); - - for (index, ch) in kind_string.char_indices() { - if index == 0 { - for cha in ch.to_uppercase() { - kind_string_capitalized.push(cha); - } - } else { - kind_string_capitalized.push(ch); - } - } - - translate!("base-common-read-error", "error" => kind_string_capitalized) +fn format_read_error(error: &io::Error) -> String { + translate!("base-common-read-error", "error" => strip_errno(error)) } /// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace. @@ -944,7 +929,7 @@ fn read_and_has_padding(input: &mut R) -> UResult<(bool, Vec Self; + + fn with_length(self) -> Self; + + fn with_check_and_opts(self) -> Self; + + fn with_binary(self) -> Self; + + fn with_text(self, is_default: bool) -> Self; + + fn with_tag(self, is_default: bool) -> Self; + + fn with_untagged(self) -> Self; + + fn with_raw(self) -> Self; + + fn with_base64(self) -> Self; + + fn with_zero(self) -> Self; + + fn with_debug(self) -> Self; +} + +impl ChecksumCommand for Command { + fn with_algo(self) -> Self { + self.arg( + Arg::new(options::ALGORITHM) + .long(options::ALGORITHM) + .short('a') + .help(translate!("ck-common-help-algorithm")) + .value_name("ALGORITHM") + .value_parser(SUPPORTED_ALGORITHMS), + ) + } + + fn with_length(self) -> Self { + self.arg( + Arg::new(options::LENGTH) + .long(options::LENGTH) + .short('l') + .help(translate!("ck-common-help-length")) + .action(ArgAction::Set), + ) + } + + fn with_check_and_opts(self) -> Self { + self.arg( + Arg::new(options::CHECK) + .short('c') + .long(options::CHECK) + .help(translate!("ck-common-help-check")) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(options::WARN) + .short('w') + .long("warn") + .help(translate!("ck-common-help-warn")) + .action(ArgAction::SetTrue) + .overrides_with_all([options::STATUS, options::QUIET]), + ) + .arg( + Arg::new(options::STATUS) + .long("status") + .help(translate!("ck-common-help-status")) + .action(ArgAction::SetTrue) + .overrides_with_all([options::WARN, options::QUIET]), + ) + .arg( + Arg::new(options::QUIET) + .long(options::QUIET) + .help(translate!("ck-common-help-quiet")) + .action(ArgAction::SetTrue) + .overrides_with_all([options::STATUS, options::WARN]), + ) + .arg( + Arg::new(options::IGNORE_MISSING) + .long(options::IGNORE_MISSING) + .help(translate!("ck-common-help-ignore-missing")) + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new(options::STRICT) + .long(options::STRICT) + .help(translate!("ck-common-help-strict")) + .action(ArgAction::SetTrue), + ) + } + + fn with_binary(self) -> Self { + self.arg( + Arg::new(options::BINARY) + .long(options::BINARY) + .short('b') + .hide(true) + .action(ArgAction::SetTrue), + ) + } + + fn with_text(self, is_default: bool) -> Self { + let mut arg = Arg::new(options::TEXT) + .long(options::TEXT) + .short('t') + .action(ArgAction::SetTrue); + + arg = if is_default { + arg.help(translate!("ck-common-help-text")) + } else { + arg.hide(true) + }; + + self.arg(arg) + } + + fn with_tag(self, default: bool) -> Self { + let mut arg = Arg::new(options::TAG) + .long(options::TAG) + .action(ArgAction::SetTrue); + + arg = if default { + arg.help(translate!("ck-common-help-tag-default")) + } else { + arg.help(translate!("ck-common-help-tag")) + }; + + self.arg(arg) + } + + fn with_untagged(self) -> Self { + self.arg( + Arg::new(options::UNTAGGED) + .long(options::UNTAGGED) + .help(translate!("ck-common-help-untagged")) + .action(ArgAction::SetTrue), + ) + } + + fn with_raw(self) -> Self { + self.arg( + Arg::new(options::RAW) + .long(options::RAW) + .help(translate!("ck-common-help-raw")) + .action(ArgAction::SetTrue), + ) + } + + fn with_base64(self) -> Self { + self.arg( + Arg::new(options::BASE64) + .long(options::BASE64) + .help(translate!("ck-common-help-base64")) + .action(ArgAction::SetTrue) + // Even though this could easily just override an earlier '--raw', + // GNU cksum does not permit these flags to be combined: + .conflicts_with(options::RAW), + ) + } + + fn with_zero(self) -> Self { + self.arg( + Arg::new(options::ZERO) + .long(options::ZERO) + .short('z') + .help(translate!("ck-common-help-zero")) + .action(ArgAction::SetTrue), + ) + } + + fn with_debug(self) -> Self { + self.arg( + Arg::new(options::DEBUG) + .long(options::DEBUG) + .help(translate!("ck-common-help-debug")) + .action(ArgAction::SetTrue), + ) + } +} diff --git a/src/uu/checksum_common/src/lib.rs b/src/uu/checksum_common/src/lib.rs new file mode 100644 index 00000000000..1d5a2726520 --- /dev/null +++ b/src/uu/checksum_common/src/lib.rs @@ -0,0 +1,207 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore (ToDO) algo + +use std::ffi::OsString; + +use clap::builder::ValueParser; +use clap::{Arg, ArgAction, ArgMatches, Command, ValueHint}; + +use uucore::checksum::compute::{ + ChecksumComputeOptions, OutputFormat, perform_checksum_computation, +}; +use uucore::checksum::validate::{self, ChecksumValidateOptions, ChecksumVerbose}; +use uucore::checksum::{AlgoKind, ChecksumError, SizedAlgoKind}; +use uucore::error::UResult; +use uucore::line_ending::LineEnding; +use uucore::{crate_version, format_usage, localized_help_template, util_name}; + +mod cli; +pub use cli::ChecksumCommand; +pub use cli::options; + +/// Expands to generate the right `uumain` and `uu_app` functions +/// for standalone checksum binaries. +/// +/// Example: +/// ``` +/// use uu_checksum_common::declare_standalone; +/// use uucore::checksum::AlgoKind; +/// +/// declare_standalone!("sha512sum", AlgoKind::Sha512); +/// ``` +#[macro_export] +macro_rules! declare_standalone { + ($bin:literal, $kind:expr) => { + #[::uucore::main] + pub fn uumain(args: impl ::uucore::Args) -> ::uucore::error::UResult<()> { + ::uu_checksum_common::standalone_main($kind, uu_app(), args) + } + + #[inline] + pub fn uu_app() -> ::clap::Command { + ::uu_checksum_common::standalone_checksum_app( + ::uucore::translate!(concat!($bin, "-about")), + ::uucore::translate!(concat!($bin, "-usage")), + ) + } + }; +} + +/// Entrypoint for standalone checksums accepting the `--length` argument +/// +/// Note: Ideally, we wouldn't require a `cmd` to be passed to the function, +/// but for localization purposes, the standalone binaries must declare their +/// command (with about and usage) themselves, otherwise calling --help from +/// the multicall binary results in an unformatted output. +pub fn standalone_with_length_main( + algo: AlgoKind, + cmd: Command, + args: impl uucore::Args, + validate_len: fn(&str) -> UResult>, +) -> UResult<()> { + let matches = uucore::clap_localization::handle_clap_result(cmd, args)?; + let algo = Some(algo); + + let length = matches + .get_one::(options::LENGTH) + .map(String::as_str) + .map(validate_len) + .transpose()? + .flatten(); + + let format = OutputFormat::from_standalone(std::env::args_os()); + + checksum_main(algo, length, matches, format?) +} + +/// Entrypoint for standalone checksums *NOT* accepting the `--length` argument +pub fn standalone_main(algo: AlgoKind, cmd: Command, args: impl uucore::Args) -> UResult<()> { + let matches = uucore::clap_localization::handle_clap_result(cmd, args)?; + let algo = Some(algo); + + let format = OutputFormat::from_standalone(std::env::args_os()); + + checksum_main(algo, None, matches, format?) +} + +/// Base command processing for all the checksum executables. +pub fn default_checksum_app(about: String, usage: String) -> Command { + Command::new(util_name()) + .version(crate_version!()) + .help_template(localized_help_template(util_name())) + .about(about) + .override_usage(format_usage(&usage)) + .infer_long_args(true) + .args_override_self(true) + .arg( + Arg::new(options::FILE) + .hide(true) + .action(ArgAction::Append) + .value_parser(ValueParser::os_string()) + .default_value("-") + .hide_default_value(true) + .value_hint(ValueHint::FilePath), + ) +} + +/// Command processing for standalone checksums accepting the `--length` +/// argument +pub fn standalone_checksum_app_with_length(about: String, usage: String) -> Command { + default_checksum_app(about, usage) + .with_binary() + .with_check_and_opts() + .with_length() + .with_tag(false) + .with_text(true) + .with_zero() +} + +/// Command processing for standalone checksums *NOT* accepting the `--length` +/// argument +pub fn standalone_checksum_app(about: String, usage: String) -> Command { + default_checksum_app(about, usage) + .with_binary() + .with_check_and_opts() + .with_tag(false) + .with_text(true) + .with_zero() +} + +/// This is the common entrypoint to all checksum utils. Performs some +/// validation on arguments and proceeds in computing or checking mode. +pub fn checksum_main( + algo: Option, + length: Option, + matches: ArgMatches, + output_format: OutputFormat, +) -> UResult<()> { + let check = matches.get_flag("check"); + + let check_flag = |flag| match (check, matches.get_flag(flag)) { + (_, false) => Ok(false), + (true, true) => Ok(true), + (false, true) => Err(ChecksumError::CheckOnlyFlag(flag.into())), + }; + + // Each of the following flags are only expected in --check mode. + // If we encounter them otherwise, end with an error. + let ignore_missing = check_flag("ignore-missing")?; + let warn = check_flag("warn")?; + let quiet = check_flag("quiet")?; + let strict = check_flag("strict")?; + let status = check_flag("status")?; + + // clap provides the default value -. So we unwrap() safety. + let files = matches + .get_many::(options::FILE) + .unwrap() + .map(|s| s.as_os_str()); + + if check { + // cksum does not support '--check'ing legacy algorithms + if algo.is_some_and(AlgoKind::is_legacy) { + return Err(ChecksumError::AlgorithmNotSupportedWithCheck.into()); + } + + let text_flag = matches.get_flag(options::TEXT); + let binary_flag = matches.get_flag(options::BINARY); + let tag = matches.get_flag(options::TAG); + + if tag || binary_flag || text_flag { + return Err(ChecksumError::BinaryTextConflict.into()); + } + + // Execute the checksum validation based on the presence of files or the use of stdin + + let verbose = ChecksumVerbose::new(status, quiet, warn); + let opts = ChecksumValidateOptions { + ignore_missing, + strict, + verbose, + }; + + return validate::perform_checksum_validation(files, algo, length, opts); + } + + // Not --check + + // Set the default algorithm to CRC when not '--check'ing. + let algo_kind = algo.unwrap_or(AlgoKind::Crc); + + let algo = SizedAlgoKind::from_unsized(algo_kind, length)?; + let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO)); + + let opts = ChecksumComputeOptions { + algo_kind: algo, + output_format, + line_ending, + }; + + perform_checksum_computation(opts, files)?; + + Ok(()) +} diff --git a/src/uu/cksum/Cargo.toml b/src/uu/cksum/Cargo.toml index 8403972731a..5f509e31335 100644 --- a/src/uu/cksum/Cargo.toml +++ b/src/uu/cksum/Cargo.toml @@ -25,11 +25,11 @@ uucore = { workspace = true, features = [ "sum", "hardware", ] } +uu_checksum_common = { workspace = true } fluent = { workspace = true } [dev-dependencies] divan = { workspace = true } -tempfile = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } [[bin]] diff --git a/src/uu/cksum/locales/en-US.ftl b/src/uu/cksum/locales/en-US.ftl index 834cd77b0ef..aece6fc5b72 100644 --- a/src/uu/cksum/locales/en-US.ftl +++ b/src/uu/cksum/locales/en-US.ftl @@ -12,19 +12,3 @@ cksum-after-help = DIGEST determines the digest algorithm and default output for - sha3: (only available through cksum) - blake2b: (equivalent to b2sum) - sm3: (only available through cksum) - -# Help messages -cksum-help-algorithm = select the digest type to use. See DIGEST below -cksum-help-untagged = create a reversed style checksum, without digest type -cksum-help-tag = create a BSD style checksum, undo --untagged (default) -cksum-help-length = digest length in bits; must not exceed the max for the blake2 algorithm and must be a multiple of 8 -cksum-help-raw = emit a raw binary digest, not hexadecimal -cksum-help-strict = exit non-zero for improperly formatted checksum lines -cksum-help-check = read hashsums from the FILEs and check them -cksum-help-base64 = emit a base64 digest, not hexadecimal -cksum-help-warn = warn about improperly formatted checksum lines -cksum-help-status = don't output anything, status code shows success -cksum-help-quiet = don't print OK for each successfully verified file -cksum-help-ignore-missing = don't fail or report status for missing files -cksum-help-zero = end each output line with NUL, not newline, and disable file name escaping -cksum-help-debug = print CPU hardware capability detection info used by cksum diff --git a/src/uu/cksum/locales/fr-FR.ftl b/src/uu/cksum/locales/fr-FR.ftl index 01136f606f9..bbc12e59cde 100644 --- a/src/uu/cksum/locales/fr-FR.ftl +++ b/src/uu/cksum/locales/fr-FR.ftl @@ -12,19 +12,3 @@ cksum-after-help = DIGEST détermine l'algorithme de condensé et le format de s - sha3 : (disponible uniquement via cksum) - blake2b : (équivalent à b2sum) - sm3 : (disponible uniquement via cksum) - -# Messages d'aide -cksum-help-algorithm = sélectionner le type de condensé à utiliser. Voir DIGEST ci-dessous -cksum-help-untagged = créer une somme de contrôle de style inversé, sans type de condensé -cksum-help-tag = créer une somme de contrôle de style BSD, annuler --untagged (par défaut) -cksum-help-length = longueur du condensé en bits ; ne doit pas dépasser le maximum pour l'algorithme blake2 et doit être un multiple de 8 -cksum-help-raw = émettre un condensé binaire brut, pas hexadécimal -cksum-help-strict = sortir avec un code non-zéro pour les lignes de somme de contrôle mal formatées -cksum-help-check = lire les sommes de hachage des FICHIERs et les vérifier -cksum-help-base64 = émettre un condensé base64, pas hexadécimal -cksum-help-warn = avertir des lignes de somme de contrôle mal formatées -cksum-help-status = ne rien afficher, le code de statut indique le succès -cksum-help-quiet = ne pas afficher OK pour chaque fichier vérifié avec succès -cksum-help-ignore-missing = ne pas échouer ou signaler le statut pour les fichiers manquants -cksum-help-zero = terminer chaque ligne de sortie avec NUL, pas un saut de ligne, et désactiver l'échappement des noms de fichiers -cksum-help-debug = afficher les informations de débogage sur la détection de la prise en charge matérielle du processeur diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 7d3228407ef..0f9fdee5fa6 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -5,23 +5,18 @@ // spell-checker:ignore (ToDO) fname, algo, bitlen -use clap::builder::ValueParser; -use clap::{Arg, ArgAction, Command}; -use std::ffi::{OsStr, OsString}; -use uucore::checksum::compute::{ - ChecksumComputeOptions, figure_out_output_format, perform_checksum_computation, -}; -use uucore::checksum::validate::{ - ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, -}; +use std::ffi::OsStr; + +use clap::Command; +use uu_checksum_common::{ChecksumCommand, checksum_main, default_checksum_app, options}; + +use uucore::checksum::compute::OutputFormat; use uucore::checksum::{ - AlgoKind, ChecksumError, SUPPORTED_ALGORITHMS, SizedAlgoKind, calculate_blake2b_length_str, - sanitize_sha2_sha3_length_str, + AlgoKind, ChecksumError, calculate_blake2b_length_str, sanitize_sha2_sha3_length_str, }; use uucore::error::UResult; use uucore::hardware::{HasHardwareFeatures as _, SimdPolicy}; -use uucore::line_ending::LineEnding; -use uucore::{format_usage, show_error, translate}; +use uucore::{show_error, translate}; /// Print CPU hardware capability detection information to stderr /// This matches GNU cksum's --debug behavior @@ -47,26 +42,6 @@ fn print_cpu_debug_info() { } } -mod options { - pub const ALGORITHM: &str = "algorithm"; - pub const FILE: &str = "file"; - pub const UNTAGGED: &str = "untagged"; - pub const TAG: &str = "tag"; - pub const LENGTH: &str = "length"; - pub const RAW: &str = "raw"; - pub const BASE64: &str = "base64"; - pub const CHECK: &str = "check"; - pub const STRICT: &str = "strict"; - pub const TEXT: &str = "text"; - pub const BINARY: &str = "binary"; - pub const STATUS: &str = "status"; - pub const WARN: &str = "warn"; - pub const IGNORE_MISSING: &str = "ignore-missing"; - pub const QUIET: &str = "quiet"; - pub const ZERO: &str = "zero"; - pub const DEBUG: &str = "debug"; -} - /// cksum has a bunch of legacy behavior. We handle this in this function to /// make sure they are self contained and "easier" to understand. /// @@ -137,22 +112,6 @@ fn maybe_sanitize_length( pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?; - let check = matches.get_flag(options::CHECK); - - let check_flag = |flag| match (check, matches.get_flag(flag)) { - (_, false) => Ok(false), - (true, true) => Ok(true), - (false, true) => Err(ChecksumError::CheckOnlyFlag(flag.into())), - }; - - // Each of the following flags are only expected in --check mode. - // If we encounter them otherwise, end with an error. - let ignore_missing = check_flag(options::IGNORE_MISSING)?; - let warn = check_flag(options::WARN)?; - let quiet = check_flag(options::QUIET)?; - let strict = check_flag(options::STRICT)?; - let status = check_flag(options::STATUS)?; - let algo_cli = matches .get_one::(options::ALGORITHM) .map(AlgoKind::from_cksum) @@ -164,200 +123,36 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let length = maybe_sanitize_length(algo_cli, input_length)?; - // clap provides the default value -. So we unwrap() safety. - let files = matches - .get_many::(options::FILE) - .unwrap() - .map(|s| s.as_os_str()); - - if check { - // cksum does not support '--check'ing legacy algorithms - if algo_cli.is_some_and(AlgoKind::is_legacy) { - return Err(ChecksumError::AlgorithmNotSupportedWithCheck.into()); - } - - let text_flag = matches.get_flag(options::TEXT); - let binary_flag = matches.get_flag(options::BINARY); - let tag = matches.get_flag(options::TAG); - - if tag || binary_flag || text_flag { - return Err(ChecksumError::BinaryTextConflict.into()); - } - - // Execute the checksum validation based on the presence of files or the use of stdin - - let verbose = ChecksumVerbose::new(status, quiet, warn); - let opts = ChecksumValidateOptions { - ignore_missing, - strict, - verbose, - }; - - return perform_checksum_validation(files, algo_cli, length, opts); - } + let (tag, binary) = handle_tag_text_binary_flags(std::env::args_os())?; - // Not --check + let output_format = OutputFormat::from_cksum( + algo_cli.unwrap_or(AlgoKind::Crc), + tag, + binary, + /* raw */ matches.get_flag(options::RAW), + /* base64 */ matches.get_flag(options::BASE64), + ); // Print hardware debug info if requested if matches.get_flag(options::DEBUG) { print_cpu_debug_info(); } - // Set the default algorithm to CRC when not '--check'ing. - let algo_kind = algo_cli.unwrap_or(AlgoKind::Crc); - - let (tag, binary) = handle_tag_text_binary_flags(std::env::args_os())?; - - let algo = SizedAlgoKind::from_unsized(algo_kind, length)?; - let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO)); - - let opts = ChecksumComputeOptions { - algo_kind: algo, - output_format: figure_out_output_format( - algo, - tag, - binary, - matches.get_flag(options::RAW), - matches.get_flag(options::BASE64), - ), - line_ending, - }; - - perform_checksum_computation(opts, files)?; - - Ok(()) + checksum_main(algo_cli, length, matches, output_format) } pub fn uu_app() -> Command { - Command::new(uucore::util_name()) - .version(uucore::crate_version!()) - .help_template(uucore::localized_help_template(uucore::util_name())) - .about(translate!("cksum-about")) - .override_usage(format_usage(&translate!("cksum-usage"))) - .infer_long_args(true) - .args_override_self(true) - .arg( - Arg::new(options::FILE) - .hide(true) - .action(ArgAction::Append) - .value_parser(ValueParser::os_string()) - .default_value("-") - .hide_default_value(true) - .value_hint(clap::ValueHint::FilePath), - ) - .arg( - Arg::new(options::ALGORITHM) - .long(options::ALGORITHM) - .short('a') - .help(translate!("cksum-help-algorithm")) - .value_name("ALGORITHM") - .value_parser(SUPPORTED_ALGORITHMS), - ) - .arg( - Arg::new(options::UNTAGGED) - .long(options::UNTAGGED) - .help(translate!("cksum-help-untagged")) - .action(ArgAction::SetTrue) - .overrides_with(options::TAG), - ) - .arg( - Arg::new(options::TAG) - .long(options::TAG) - .help(translate!("cksum-help-tag")) - .action(ArgAction::SetTrue) - .overrides_with(options::UNTAGGED), - ) - .arg( - Arg::new(options::LENGTH) - .long(options::LENGTH) - .short('l') - .help(translate!("cksum-help-length")) - .action(ArgAction::Set), - ) - .arg( - Arg::new(options::RAW) - .long(options::RAW) - .help(translate!("cksum-help-raw")) - .action(ArgAction::SetTrue), - ) - .arg( - Arg::new(options::STRICT) - .long(options::STRICT) - .help(translate!("cksum-help-strict")) - .action(ArgAction::SetTrue), - ) - .arg( - Arg::new(options::CHECK) - .short('c') - .long(options::CHECK) - .help(translate!("cksum-help-check")) - .action(ArgAction::SetTrue), - ) - .arg( - Arg::new(options::BASE64) - .long(options::BASE64) - .help(translate!("cksum-help-base64")) - .action(ArgAction::SetTrue) - // Even though this could easily just override an earlier '--raw', - // GNU cksum does not permit these flags to be combined: - .conflicts_with(options::RAW), - ) - .arg( - Arg::new(options::TEXT) - .long(options::TEXT) - .short('t') - .hide(true) - .overrides_with(options::BINARY) - .action(ArgAction::SetTrue), - ) - .arg( - Arg::new(options::BINARY) - .long(options::BINARY) - .short('b') - .hide(true) - .overrides_with(options::TEXT) - .action(ArgAction::SetTrue), - ) - .arg( - Arg::new(options::WARN) - .short('w') - .long("warn") - .help(translate!("cksum-help-warn")) - .action(ArgAction::SetTrue) - .overrides_with_all([options::STATUS, options::QUIET]), - ) - .arg( - Arg::new(options::STATUS) - .long("status") - .help(translate!("cksum-help-status")) - .action(ArgAction::SetTrue) - .overrides_with_all([options::WARN, options::QUIET]), - ) - .arg( - Arg::new(options::QUIET) - .long(options::QUIET) - .help(translate!("cksum-help-quiet")) - .action(ArgAction::SetTrue) - .overrides_with_all([options::WARN, options::STATUS]), - ) - .arg( - Arg::new(options::IGNORE_MISSING) - .long(options::IGNORE_MISSING) - .help(translate!("cksum-help-ignore-missing")) - .action(ArgAction::SetTrue), - ) - .arg( - Arg::new(options::ZERO) - .long(options::ZERO) - .short('z') - .help(translate!("cksum-help-zero")) - .action(ArgAction::SetTrue), - ) - .arg( - Arg::new(options::DEBUG) - .long(options::DEBUG) - .help(translate!("cksum-help-debug")) - .action(ArgAction::SetTrue), - ) + default_checksum_app(translate!("cksum-about"), translate!("cksum-usage")) + .with_algo() + .with_untagged() + .with_tag(true) + .with_length() + .with_raw() + .with_check_and_opts() + .with_base64() + .with_text(false) + .with_binary() + .with_zero() + .with_debug() .after_help(translate!("cksum-after-help")) } diff --git a/src/uu/cp/Cargo.toml b/src/uu/cp/Cargo.toml index 8a2391e55cb..592b9cba9a3 100644 --- a/src/uu/cp/Cargo.toml +++ b/src/uu/cp/Cargo.toml @@ -39,7 +39,6 @@ thiserror = { workspace = true } fluent = { workspace = true } [target.'cfg(unix)'.dependencies] -xattr = { workspace = true } exacl = { workspace = true, optional = true } [[bin]] diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 22134f0d670..62b8b7a7bcf 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -1790,7 +1790,7 @@ pub(crate) fn copy_attributes( Ok(()) })?; - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] handle_preserve(&attributes.context, || -> CopyResult<()> { // Get the source context and apply it to the destination if let Ok(context) = selinux::SecurityContext::of_path(source, false, false) { @@ -2586,7 +2586,7 @@ fn copy_file( copy_attributes(source, dest, &options.attributes)?; } - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] if options.set_selinux_context && uucore::selinux::is_selinux_enabled() { // Set the given selinux permissions on the copied file. if let Err(e) = diff --git a/src/uu/cut/Cargo.toml b/src/uu/cut/Cargo.toml index 0133180f070..f7ea5b203c0 100644 --- a/src/uu/cut/Cargo.toml +++ b/src/uu/cut/Cargo.toml @@ -26,7 +26,6 @@ fluent = { workspace = true } [dev-dependencies] divan = { workspace = true } -tempfile = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } [[bin]] diff --git a/src/uu/date/Cargo.toml b/src/uu/date/Cargo.toml index 9bff97696f0..8820d96b98c 100644 --- a/src/uu/date/Cargo.toml +++ b/src/uu/date/Cargo.toml @@ -18,16 +18,22 @@ workspace = true [lib] path = "src/date.rs" +[features] +default = ["i18n-datetime"] +i18n-datetime = ["uucore/i18n-datetime", "dep:icu_calendar", "dep:icu_locale"] + [dependencies] clap = { workspace = true } fluent = { workspace = true } +icu_calendar = { workspace = true, optional = true } +icu_locale = { workspace = true, optional = true } jiff = { workspace = true, features = [ "tzdb-bundle-platform", "tzdb-zoneinfo", "tzdb-concatenated", ] } parse_datetime = { workspace = true } -uucore = { workspace = true, features = ["parser"] } +uucore = { workspace = true, features = ["parser", "i18n-datetime"] } [target.'cfg(unix)'.dependencies] nix = { workspace = true, features = ["time"] } diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index b63b04bf42c..f82fe1c38bf 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore strtime ; (format) DATEFILE MMDDhhmm ; (vars) datetime datetimes getres AWST ACST AEST +// spell-checker:ignore strtime ; (format) DATEFILE MMDDhhmm ; (vars) datetime datetimes getres AWST ACST AEST foobarbaz mod locale; @@ -11,6 +11,7 @@ use clap::{Arg, ArgAction, Command}; use jiff::fmt::strtime::{self, BrokenDownTime, Config, PosixCustom}; use jiff::tz::{TimeZone, TimeZoneDatabase}; use jiff::{Timestamp, Zoned}; +use std::borrow::Cow; use std::collections::HashMap; use std::fs::File; use std::io::{BufRead, BufReader, BufWriter, Write}; @@ -19,6 +20,11 @@ use std::sync::OnceLock; use uucore::display::Quotable; use uucore::error::FromIo; use uucore::error::{UResult, USimpleError}; +#[cfg(feature = "i18n-datetime")] +use uucore::i18n::datetime::{ + get_era_year, get_localized_day_name, get_localized_month_name, get_time_locale, + should_use_icu_locale, +}; use uucore::translate; use uucore::{format_usage, show}; #[cfg(windows)] @@ -130,6 +136,42 @@ enum DayDelta { Next, } +/// Strip parenthesized comments from a date string. +/// +/// GNU date removes balanced parentheses and their content, treating them as comments. +/// If parentheses are unbalanced, everything from the unmatched '(' onwards is ignored. +/// +/// Examples: +/// - "2026(comment)-01-05" -> "2026-01-05" +/// - "1(ignore comment to eol" -> "1" +/// - "(" -> "" +/// - "((foo)2026-01-05)" -> "" +fn strip_parenthesized_comments(input: &str) -> Cow<'_, str> { + if !input.contains('(') { + return Cow::Borrowed(input); + } + + let mut result = String::with_capacity(input.len()); + let mut depth = 0; + + for c in input.chars() { + match c { + '(' => { + depth += 1; + } + ')' if depth > 0 => { + depth -= 1; + } + _ if depth == 0 => { + result.push(c); + } + _ => {} + } + } + + Cow::Owned(result) +} + /// Parse military timezone with optional hour offset. /// Pattern: single letter (a-z except j) optionally followed by 1-2 digits. /// Returns Some(total_hours_in_utc) or None if pattern doesn't match. @@ -286,7 +328,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Iterate over all dates - whether it's a single date or a file. let dates: Box> = match settings.date_source { DateSource::Human(ref input) => { + // GNU compatibility (Comments in parentheses) + let input = strip_parenthesized_comments(input); let input = input.trim(); + // GNU compatibility (Empty string): // An empty string (or whitespace-only) should be treated as midnight today. let is_empty_or_whitespace = input.is_empty(); @@ -434,20 +479,18 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let config = Config::new().custom(PosixCustom::new()).lenient(true); for date in dates { match date { - Ok(date) => { - match BrokenDownTime::from(&date).to_string_with_config(&config, format_string) { - Ok(s) => writeln!(stdout, "{s}").map_err(|e| { - USimpleError::new(1, translate!("date-error-write", "error" => e)) - })?, - Err(e) => { - let _ = stdout.flush(); - return Err(USimpleError::new( - 1, - translate!("date-error-invalid-format", "format" => format_string, "error" => e), - )); - } + Ok(date) => match format_date_with_locale_aware_months(&date, format_string, &config) { + Ok(s) => writeln!(stdout, "{s}").map_err(|e| { + USimpleError::new(1, translate!("date-error-write", "error" => e)) + })?, + Err(e) => { + let _ = stdout.flush(); + return Err(USimpleError::new( + 1, + translate!("date-error-invalid-format", "format" => format_string, "error" => e), + )); } - } + }, Err((input, _err)) => { let _ = stdout.flush(); show!(USimpleError::new( @@ -572,6 +615,119 @@ pub fn uu_app() -> Command { .arg(Arg::new(OPT_FORMAT).num_args(0..).trailing_var_arg(true)) } +fn format_date_with_locale_aware_months( + date: &Zoned, + format_string: &str, + config: &Config, +) -> Result { + // Only use ICU for non-English locales and when format string contains month, day, or era year specifiers + if (format_string.contains("%B") + || format_string.contains("%b") + || format_string.contains("%A") + || format_string.contains("%a") + || format_string.contains("%Y") + || format_string.contains("%Ey")) + && should_use_icu_locale() + { + let broken_down = BrokenDownTime::from(date); + // Get localized month names if needed + let (full_month, abbrev_month) = + if format_string.contains("%B") || format_string.contains("%b") { + if let Some(month_val) = broken_down.month() { + let month_u8 = if (1..=12).contains(&month_val) { + month_val as u8 + } else { + 1 // fallback to January for invalid values + }; + ( + get_localized_month_name(month_u8, true), + get_localized_month_name(month_u8, false), + ) + } else { + (String::new(), String::new()) + } + } else { + (String::new(), String::new()) + }; + + // Get localized day names if needed + let (full_day, abbrev_day) = if format_string.contains("%A") || format_string.contains("%a") + { + if let (Some(year), Some(month), Some(day)) = + (broken_down.year(), broken_down.month(), broken_down.day()) + { + ( + get_localized_day_name(year.into(), month as u8, day as u8, true), + get_localized_day_name(year.into(), month as u8, day as u8, false), + ) + } else { + (String::new(), String::new()) + } + } else { + (String::new(), String::new()) + }; + + // Get era year if needed + let era_year = if format_string.contains("%Y") || format_string.contains("%Ey") { + if let (Some(year), Some(month), Some(day)) = + (broken_down.year(), broken_down.month(), broken_down.day()) + { + let (locale, _encoding) = get_time_locale(); + get_era_year(year.into(), month as u8, day as u8, locale) + } else { + None + } + } else { + None + }; + + // Replace format specifiers with NULL-byte placeholders for successful ICU translations only + // Use NULL bytes to avoid collision with user format strings + let mut temp_format = format_string.to_string(); + if !full_month.is_empty() { + temp_format = temp_format.replace("%B", "\0FULL_MONTH\0"); + } + if !abbrev_month.is_empty() { + temp_format = temp_format.replace("%b", "\0ABBREV_MONTH\0"); + } + if !full_day.is_empty() { + temp_format = temp_format.replace("%A", "\0FULL_DAY\0"); + } + if !abbrev_day.is_empty() { + temp_format = temp_format.replace("%a", "\0ABBREV_DAY\0"); + } + if era_year.is_some() { + temp_format = temp_format.replace("%Y", "\0ERA_YEAR\0"); + } + + // Format with the temporary string + let temp_result = broken_down.to_string_with_config(config, &temp_format)?; + + // Replace NULL-byte placeholders with localized names + let mut final_result = temp_result; + if !full_month.is_empty() { + final_result = final_result.replace("\0FULL_MONTH\0", &full_month); + } + if !abbrev_month.is_empty() { + final_result = final_result.replace("\0ABBREV_MONTH\0", &abbrev_month); + } + if !full_day.is_empty() { + final_result = final_result.replace("\0FULL_DAY\0", &full_day); + } + if !abbrev_day.is_empty() { + final_result = final_result.replace("\0ABBREV_DAY\0", &abbrev_day); + } + if let Some(era_year_val) = era_year { + final_result = final_result.replace("\0ERA_YEAR\0", &era_year_val.to_string()); + } + + return Ok(final_result); + } + + // Fallback to regular formatting + BrokenDownTime::from(date).to_string_with_config(config, format_string) +} + /// Return the appropriate format string for the given settings. fn make_format_string(settings: &Settings) -> &str { match settings.format { @@ -887,4 +1043,38 @@ mod tests { assert_eq!(parse_military_timezone_with_offset("m999"), None); // Too long assert_eq!(parse_military_timezone_with_offset("9m"), None); // Starts with digit } + + #[test] + fn test_strip_parenthesized_comments() { + assert_eq!(strip_parenthesized_comments("hello"), "hello"); + assert_eq!(strip_parenthesized_comments("2026-01-05"), "2026-01-05"); + assert_eq!(strip_parenthesized_comments("("), ""); + assert_eq!(strip_parenthesized_comments("1(comment"), "1"); + assert_eq!( + strip_parenthesized_comments("2026-01-05(this is a comment"), + "2026-01-05" + ); + assert_eq!( + strip_parenthesized_comments("2026(comment)-01-05"), + "2026-01-05" + ); + assert_eq!(strip_parenthesized_comments("()"), ""); + assert_eq!(strip_parenthesized_comments("((foo)2026-01-05)"), ""); + + // These cases test the balanced parentheses removal feature + // which extends beyond what GNU date strictly supports + assert_eq!(strip_parenthesized_comments("a(b)c"), "ac"); + assert_eq!(strip_parenthesized_comments("a(b)c(d)e"), "ace"); + assert_eq!(strip_parenthesized_comments("(a)(b)"), ""); + + // When parentheses are unmatched, processing stops at the unmatched opening paren + // In this case "a(b)c(d", the (b) is balanced but (d is unmatched + // We process "a(b)c" and stop at the unmatched "(d" + assert_eq!(strip_parenthesized_comments("a(b)c(d"), "ac"); + + // Additional edge cases for nested and complex parentheses + assert_eq!(strip_parenthesized_comments("a(b(c)d)e"), "ae"); // Nested balanced + assert_eq!(strip_parenthesized_comments("a(b(c)d"), "a"); // Nested unbalanced + assert_eq!(strip_parenthesized_comments("a(b)c(d)e(f"), "ace"); // Multiple groups, last unmatched + } } diff --git a/src/uu/dd/Cargo.toml b/src/uu/dd/Cargo.toml index f7941f5d3d2..5d5819c302d 100644 --- a/src/uu/dd/Cargo.toml +++ b/src/uu/dd/Cargo.toml @@ -32,8 +32,8 @@ thiserror = { workspace = true } fluent = { workspace = true } [target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] -signal-hook = { workspace = true } nix = { workspace = true, features = ["fs"] } +signal-hook = { workspace = true } [[bin]] name = "dd" diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 45fdf6f3d1f..fc1adc537fc 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -5,9 +5,6 @@ // spell-checker:ignore fname, ftype, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized fadvise Fadvise FADV DONTNEED ESPIPE bufferedoutput, SETFL -#[cfg(unix)] -uucore::init_startup_state_capture!(); - mod blocks; mod bufferedoutput; mod conversion_tables; diff --git a/src/uu/df/src/table.rs b/src/uu/df/src/table.rs index a50861758fd..f4d83c3aa03 100644 --- a/src/uu/df/src/table.rs +++ b/src/uu/df/src/table.rs @@ -18,7 +18,7 @@ use uucore::translate; use std::ffi::OsString; use std::iter; -use std::ops::AddAssign; +use std::ops::{Add, AddAssign}; /// A row in the filesystem usage data table. /// @@ -38,13 +38,13 @@ pub(crate) struct Row { fs_mount: OsString, /// Total number of bytes in the filesystem regardless of whether they are used. - bytes: u64, + bytes: BytesCell, /// Number of used bytes. - bytes_used: u64, + bytes_used: BytesCell, /// Number of available bytes. - bytes_avail: u64, + bytes_avail: BytesCell, /// Percentage of bytes that are used, given as a float between 0 and 1. /// @@ -81,9 +81,9 @@ impl Row { fs_device: source.into(), fs_type: "-".into(), fs_mount: "-".into(), - bytes: 0, - bytes_used: 0, - bytes_avail: 0, + bytes: BytesCell::default(), + bytes_used: BytesCell::default(), + bytes_avail: BytesCell::default(), bytes_usage: None, #[cfg(target_os = "macos")] bytes_capacity: None, @@ -114,13 +114,13 @@ impl AddAssign for Row { bytes, bytes_used, bytes_avail, - bytes_usage: if bytes == 0 { + bytes_usage: if bytes.bytes == 0 { None } else { // We use "(bytes_used + bytes_avail)" instead of "bytes" because on some filesystems (e.g. // ext4) "bytes" also includes reserved blocks we ignore for the usage calculation. // https://www.gnu.org/software/coreutils/faq/coreutils-faq.html#df-Size-and-Used-and-Available-do-not-add-up - Some(bytes_used as f64 / (bytes_used + bytes_avail) as f64) + Some(bytes_used.bytes as f64 / (bytes_used.bytes + bytes_avail.bytes) as f64) }, // TODO Figure out how to compute this. #[cfg(target_os = "macos")] @@ -137,8 +137,8 @@ impl AddAssign for Row { } } -impl From for Row { - fn from(fs: Filesystem) -> Self { +impl Row { + fn from_filesystem(fs: Filesystem, row_block_size: &BlockSize) -> Self { let MountInfo { dev_name, fs_type, @@ -163,9 +163,9 @@ impl From for Row { fs_device: dev_name, fs_type, fs_mount: mount_dir, - bytes: blocksize * blocks, - bytes_used: blocksize * bused, - bytes_avail: blocksize * bavail, + bytes: BytesCell::new(blocks * blocksize, row_block_size), + bytes_used: BytesCell::new(bused * blocksize, row_block_size), + bytes_avail: BytesCell::new(bavail * blocksize, row_block_size), bytes_usage: if blocks == 0 { None } else { @@ -192,6 +192,48 @@ impl From for Row { } } +#[derive(Debug, Copy, Clone)] +struct BytesCell { + bytes: u64, + scaled: u64, +} + +/// A bytes column in the filesystem usage data table. +/// +/// This is used to keep track of the scaled values to properly compute +/// the total values. +impl Default for BytesCell { + fn default() -> Self { + Self { + bytes: 0, + scaled: 0, + } + } +} + +impl BytesCell { + fn new(bytes: u64, block_size: &BlockSize) -> Self { + Self { + bytes, + scaled: { + let BlockSize::Bytes(d) = block_size; + (bytes as f64 / *d as f64).ceil() as u64 + }, + } + } +} + +impl Add for BytesCell { + type Output = Self; + + fn add(self, rhs: Self) -> Self { + Self { + bytes: self.bytes + rhs.bytes, + scaled: self.scaled + rhs.scaled, + } + } +} + /// A `Cell` in the table. We store raw `bytes` as the data (e.g. directory name /// may be non-Unicode). We also record the printed `width` for alignment purpose, /// as it is easier to compute on the original string. @@ -262,12 +304,18 @@ impl<'a> RowFormatter<'a> { /// Get a string giving the scaled version of the input number. /// /// The scaling factor is defined in the `options` field. - fn scaled_bytes(&self, size: u64) -> Cell { + fn scaled_bytes(&self, bytes_column: &BytesCell) -> Cell { + let size = bytes_column.scaled; let s = if let Some(h) = self.options.human_readable { + let size = if self.is_total_row { + let BlockSize::Bytes(d) = self.options.block_size; + d * size + } else { + bytes_column.bytes + }; to_magnitude_and_suffix(size.into(), SuffixType::HumanReadable(h), true) } else { - let BlockSize::Bytes(d) = self.options.block_size; - (size as f64 / d as f64).ceil().to_string() + size.to_string() }; Cell::from_ascii_string(s) } @@ -308,9 +356,9 @@ impl<'a> RowFormatter<'a> { Cell::from_string(&self.row.fs_device) } } - Column::Size => self.scaled_bytes(self.row.bytes), - Column::Used => self.scaled_bytes(self.row.bytes_used), - Column::Avail => self.scaled_bytes(self.row.bytes_avail), + Column::Size => self.scaled_bytes(&self.row.bytes), + Column::Used => self.scaled_bytes(&self.row.bytes_used), + Column::Avail => self.scaled_bytes(&self.row.bytes_avail), Column::Pcent => Self::percentage(self.row.bytes_usage), Column::Target => { @@ -442,10 +490,12 @@ impl Table { // showing all filesystems, then print the data as a row in // the output table. if options.show_all_fs || filesystem.usage.blocks > 0 { - let row = Row::from(filesystem); + let row = Row::from_filesystem(filesystem, &options.block_size); let fmt = RowFormatter::new(&row, options, false); let values = fmt.get_cells(); - total += row; + if options.show_total { + total += row; + } rows.push(values); } @@ -527,7 +577,7 @@ mod tests { use crate::blocks::HumanReadable; use crate::columns::Column; - use crate::table::{Cell, Header, HeaderMode, Row, RowFormatter, Table}; + use crate::table::{BytesCell, Cell, Header, HeaderMode, Row, RowFormatter, Table}; use crate::{BlockSize, Options}; fn init() { @@ -563,9 +613,9 @@ mod tests { fs_type: "my_type".to_string(), fs_mount: "my_mount".into(), - bytes: 100, - bytes_used: 25, - bytes_avail: 75, + bytes: BytesCell::new(100, &BlockSize::Bytes(1)), + bytes_used: BytesCell::new(25, &BlockSize::Bytes(1)), + bytes_avail: BytesCell::new(75, &BlockSize::Bytes(1)), bytes_usage: Some(0.25), #[cfg(target_os = "macos")] @@ -729,9 +779,9 @@ mod tests { fs_device: "my_device".to_string(), fs_mount: "my_mount".into(), - bytes: 100, - bytes_used: 25, - bytes_avail: 75, + bytes: BytesCell::new(100, &BlockSize::Bytes(1)), + bytes_used: BytesCell::new(25, &BlockSize::Bytes(1)), + bytes_avail: BytesCell::new(75, &BlockSize::Bytes(1)), bytes_usage: Some(0.25), ..Default::default() @@ -756,9 +806,9 @@ mod tests { fs_type: "my_type".to_string(), fs_mount: "my_mount".into(), - bytes: 100, - bytes_used: 25, - bytes_avail: 75, + bytes: BytesCell::new(100, &BlockSize::Bytes(1)), + bytes_used: BytesCell::new(25, &BlockSize::Bytes(1)), + bytes_avail: BytesCell::new(75, &BlockSize::Bytes(1)), bytes_usage: Some(0.25), ..Default::default() @@ -805,7 +855,7 @@ mod tests { ..Default::default() }; let row = Row { - bytes: 100, + bytes: BytesCell::new(100, &BlockSize::Bytes(100)), inodes: 10, ..Default::default() }; @@ -826,9 +876,9 @@ mod tests { fs_type: "my_type".to_string(), fs_mount: "my_mount".into(), - bytes: 40000, - bytes_used: 1000, - bytes_avail: 39000, + bytes: BytesCell::new(40000, &BlockSize::default()), + bytes_used: BytesCell::new(1000, &BlockSize::default()), + bytes_avail: BytesCell::new(39000, &BlockSize::default()), bytes_usage: Some(0.025), ..Default::default() @@ -861,9 +911,9 @@ mod tests { fs_type: "my_type".to_string(), fs_mount: "my_mount".into(), - bytes: 4096, - bytes_used: 1024, - bytes_avail: 3072, + bytes: BytesCell::new(4096, &BlockSize::default()), + bytes_used: BytesCell::new(1024, &BlockSize::default()), + bytes_avail: BytesCell::new(3072, &BlockSize::default()), bytes_usage: Some(0.25), ..Default::default() @@ -909,9 +959,9 @@ mod tests { }; let row = Row { - bytes, - bytes_used, - bytes_avail, + bytes: BytesCell::new(bytes, &BlockSize::Bytes(1000)), + bytes_used: BytesCell::new(bytes_used, &BlockSize::Bytes(1000)), + bytes_avail: BytesCell::new(bytes_avail, &BlockSize::Bytes(1000)), ..Default::default() }; RowFormatter::new(&row, &options, false).get_cells() @@ -962,7 +1012,7 @@ mod tests { }, }; - let row = Row::from(d); + let row = Row::from_filesystem(d, &BlockSize::default()); assert_eq!(row.inodes_used, 0); } diff --git a/src/uu/env/Cargo.toml b/src/uu/env/Cargo.toml index 80fe1f41266..b2e4208b982 100644 --- a/src/uu/env/Cargo.toml +++ b/src/uu/env/Cargo.toml @@ -27,7 +27,6 @@ fluent = { workspace = true } [target.'cfg(unix)'.dependencies] nix = { workspace = true, features = ["signal"] } - [[bin]] name = "env" path = "src/main.rs" diff --git a/src/uu/env/src/env.rs b/src/uu/env/src/env.rs index 40f32b7656a..a5dd8a8d748 100644 --- a/src/uu/env/src/env.rs +++ b/src/uu/env/src/env.rs @@ -1095,10 +1095,6 @@ fn list_signal_handling(log: &SignalActionLog) { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - // Rust ignores SIGPIPE (see https://github.com/rust-lang/rust/issues/62569). - // We restore its default action here. - #[cfg(unix)] - let _ = uucore::signals::enable_pipe_errors(); EnvAppData::default().run_env(args) } diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 294b3bc884c..5acd4fac225 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) ctype cwidth iflag nbytes nspaces nums tspaces uflag Preprocess +// spell-checker:ignore (ToDO) ctype cwidth iflag nbytes nspaces nums tspaces Preprocess use clap::{Arg, ArgAction, ArgMatches, Command}; use std::ffi::OsString; @@ -15,9 +15,9 @@ use std::str::from_utf8; use thiserror::Error; use unicode_width::UnicodeWidthChar; use uucore::display::Quotable; -use uucore::error::{FromIo, UError, UResult, set_exit_code}; +use uucore::error::{FromIo, UError, UResult, USimpleError, set_exit_code}; use uucore::translate; -use uucore::{format_usage, show_error}; +use uucore::{format_usage, show}; pub mod options { pub static TABS: &str = "tabs"; @@ -174,7 +174,7 @@ struct Options { tabstops: Vec, tspaces: String, iflag: bool, - uflag: bool, + utf8: bool, /// Strategy for expanding tabs for columns beyond those specified /// in `tabstops`. @@ -189,7 +189,7 @@ impl Options { }; let iflag = matches.get_flag(options::INITIAL); - let uflag = !matches.get_flag(options::NO_UTF8); + let utf8 = !matches.get_flag(options::NO_UTF8); // avoid allocations when dumping out long sequences of spaces // by precomputing the longest string of spaces we will ever need @@ -214,7 +214,7 @@ impl Options { tabstops, tspaces, iflag, - uflag, + utf8, remaining_mode, }) } @@ -296,6 +296,12 @@ fn open(path: &OsString) -> UResult>> { Ok(BufReader::new(Box::new(stdin()) as Box)) } else { let path_ref = Path::new(path); + if path_ref.is_dir() { + return Err(USimpleError::new( + 1, + translate!("expand-error-is-directory", "file" => path.maybe_quote()), + )); + } file_buf = File::open(path_ref).map_err_context(|| path.maybe_quote().to_string())?; Ok(BufReader::new(Box::new(file_buf) as Box)) } @@ -349,7 +355,62 @@ enum CharType { Other, } -#[allow(clippy::cognitive_complexity)] +/// Classify a character and determine its width and byte length. +/// +/// Returns `(CharType, display_width, byte_length)`. +#[inline] +fn classify_char(buf: &[u8], byte: usize, utf8: bool) -> (CharType, usize, usize) { + use self::CharType::{Backspace, Other, Tab}; + + if utf8 { + let nbytes = char::from(buf[byte]).len_utf8(); + + if byte + nbytes > buf.len() { + // don't overrun buffer because of invalid UTF-8 + return (Other, 1, 1); + } + + if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) { + match t.chars().next() { + Some('\t') => (Tab, 0, 1), + Some('\x08') => (Backspace, 0, 1), + Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes), + None => { + // no valid char at start of t, so take 1 byte + (Other, 1, 1) + } + } + } else { + (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide + } + } else { + ( + match buf.get(byte) { + // always take exactly 1 byte in strict ASCII mode + Some(0x09) => Tab, + Some(0x08) => Backspace, + _ => Other, + }, + 0, + 1, + ) + } +} + +/// Write spaces for a tab expansion. +#[inline] +fn write_tab_spaces( + output: &mut BufWriter, + nts: usize, + tspaces: &str, +) -> std::io::Result<()> { + if nts <= tspaces.len() { + output.write_all(&tspaces.as_bytes()[..nts]) + } else { + output.write_all(" ".repeat(nts).as_bytes()) + } +} + fn expand_line( buf: &mut Vec, output: &mut BufWriter, @@ -360,8 +421,7 @@ fn expand_line( // Fast path: if there are no tabs, backspaces, and (in UTF-8 mode or no carriage returns), // we can write the buffer directly without character-by-character processing - if !buf.contains(&b'\t') && !buf.contains(&b'\x08') && (options.uflag || !buf.contains(&b'\r')) - { + if !buf.contains(&b'\t') && !buf.contains(&b'\x08') && (options.utf8 || !buf.contains(&b'\r')) { output.write_all(buf)?; buf.truncate(0); return Ok(()); @@ -372,37 +432,7 @@ fn expand_line( let mut init = true; while byte < buf.len() { - let (ctype, cwidth, nbytes) = if options.uflag { - let nbytes = char::from(buf[byte]).len_utf8(); - - if byte + nbytes > buf.len() { - // don't overrun buffer because of invalid UTF-8 - (Other, 1, 1) - } else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) { - match t.chars().next() { - Some('\t') => (Tab, 0, nbytes), - Some('\x08') => (Backspace, 0, nbytes), - Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes), - None => { - // no valid char at start of t, so take 1 byte - (Other, 1, 1) - } - } - } else { - (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide - } - } else { - ( - match buf.get(byte) { - // always take exactly 1 byte in strict ASCII mode - Some(0x09) => Tab, - Some(0x08) => Backspace, - _ => Other, - }, - 1, - 1, - ) - }; + let (ctype, cwidth, nbytes) = classify_char(buf, byte, options.utf8); // figure out how many columns this char takes up match ctype { @@ -413,23 +443,24 @@ fn expand_line( // now dump out either spaces if we're expanding, or a literal tab if we're not if init || !options.iflag { - if nts <= options.tspaces.len() { - output.write_all(&options.tspaces.as_bytes()[..nts])?; - } else { - output.write_all(" ".repeat(nts).as_bytes())?; - } + write_tab_spaces(output, nts, &options.tspaces)?; } else { output.write_all(&buf[byte..byte + nbytes])?; } } - _ => { - col = if ctype == Other { - col + cwidth - } else if col > 0 { - col - 1 - } else { - 0 - }; + Backspace => { + col = col.saturating_sub(1); + + // if we're writing anything other than a space, then we're + // done with the line's leading spaces + if buf[byte] != 0x20 { + init = false; + } + + output.write_all(&buf[byte..byte + nbytes])?; + } + Other => { + col += cwidth; // if we're writing anything other than a space, then we're // done with the line's leading spaces @@ -449,34 +480,34 @@ fn expand_line( Ok(()) } +fn expand_file( + file: &OsString, + output: &mut BufWriter, + options: &Options, +) -> UResult<()> { + let mut buf = Vec::new(); + let mut input = open(file)?; + let ts = options.tabstops.as_ref(); + loop { + match input.read_until(b'\n', &mut buf) { + Ok(0) => break, + Ok(_) => { + expand_line(&mut buf, output, ts, options) + .map_err_context(|| translate!("expand-error-failed-to-write-output"))?; + } + Err(e) => return Err(e.map_err_context(|| file.maybe_quote().to_string())), + } + } + Ok(()) +} + fn expand(options: &Options) -> UResult<()> { let mut output = BufWriter::new(stdout()); - let ts = options.tabstops.as_ref(); - let mut buf = Vec::new(); for file in &options.files { - if Path::new(file).is_dir() { - show_error!( - "{}", - translate!("expand-error-is-directory", "file" => file.maybe_quote()) - ); + if let Err(e) = expand_file(file, &mut output, options) { + show!(e); set_exit_code(1); - continue; - } - match open(file) { - Ok(mut fh) => { - while match fh.read_until(b'\n', &mut buf) { - Ok(s) => s > 0, - Err(_) => buf.is_empty(), - } { - expand_line(&mut buf, &mut output, ts, options) - .map_err_context(|| translate!("expand-error-failed-to-write-output"))?; - } - } - Err(e) => { - show_error!("{e}"); - set_exit_code(1); - } } } // Flush once at the end diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index ef672bf9308..15d09f7a01b 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -31,7 +31,6 @@ path = "src/main.rs" [dev-dependencies] divan = { workspace = true } -rand = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } [lib] diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index 2eb97933180..d79d6d42233 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -19,6 +19,10 @@ const TAB_WIDTH: usize = 8; const NL: u8 = b'\n'; const CR: u8 = b'\r'; const TAB: u8 = b'\t'; +// Implementation threshold (8 KiB) to prevent unbounded buffer growth during streaming. +// Chosen as a small, fixed cap: large enough to avoid excessive flushes, but +// small enough to keep memory bounded when the input has no fold points. +const STREAMING_FLUSH_THRESHOLD: usize = 8 * 1024; mod options { pub const BYTES: &str = "bytes"; @@ -288,6 +292,10 @@ fn compute_col_count(buffer: &[u8], mode: WidthMode) -> usize { } fn emit_output(ctx: &mut FoldContext<'_, W>) -> UResult<()> { + // Emit one folded line: + // - with `-s`, cut at the last remembered whitespace when possible + // - otherwise, cut at the current buffer end + // The remainder (if any) stays in the buffer for the next line. let consume = match *ctx.last_space { Some(index) => index + 1, None => ctx.output.len(), @@ -309,6 +317,7 @@ fn emit_output(ctx: &mut FoldContext<'_, W>) -> UResult<()> { *ctx.col_count = compute_col_count(ctx.output, ctx.mode); if ctx.spaces { + // Rebase the remembered whitespace position into the remaining buffer. *ctx.last_space = last_space.and_then(|idx| { if idx < consume { None @@ -322,6 +331,36 @@ fn emit_output(ctx: &mut FoldContext<'_, W>) -> UResult<()> { Ok(()) } +fn maybe_flush_unbroken_output(ctx: &mut FoldContext<'_, W>) -> UResult<()> { + // In streaming mode without `-s`, avoid unbounded buffering by periodically + // flushing long unbroken segments. With `-s` we must keep the buffer so we + // can still break at the last whitespace boundary. + if ctx.spaces || ctx.output.len() < STREAMING_FLUSH_THRESHOLD { + return Ok(()); + } + + // Write raw bytes without inserting a newline; folding will continue + // based on updated column tracking in the caller. + ctx.writer.write_all(ctx.output)?; + ctx.output.clear(); + Ok(()) +} + +fn push_byte(ctx: &mut FoldContext<'_, W>, byte: u8) -> UResult<()> { + // Append a single byte to the buffer. + ctx.output.push(byte); + maybe_flush_unbroken_output(ctx) +} + +fn push_bytes(ctx: &mut FoldContext<'_, W>, bytes: &[u8]) -> UResult<()> { + // Append a byte slice to the buffer and flush if it grows too large. + if bytes.is_empty() { + return Ok(()); + } + ctx.output.extend_from_slice(bytes); + maybe_flush_unbroken_output(ctx) +} + fn process_ascii_line(line: &[u8], ctx: &mut FoldContext<'_, W>) -> UResult<()> { let mut idx = 0; let len = line.len(); @@ -331,15 +370,15 @@ fn process_ascii_line(line: &[u8], ctx: &mut FoldContext<'_, W>) -> UR NL => { *ctx.last_space = None; emit_output(ctx)?; - break; + idx += 1; } CR => { - ctx.output.push(CR); + push_byte(ctx, CR)?; *ctx.col_count = 0; idx += 1; } 0x08 => { - ctx.output.push(0x08); + push_byte(ctx, 0x08)?; *ctx.col_count = ctx.col_count.saturating_sub(1); idx += 1; } @@ -358,16 +397,23 @@ fn process_ascii_line(line: &[u8], ctx: &mut FoldContext<'_, W>) -> UR } else { *ctx.last_space = None; } - ctx.output.push(TAB); + push_byte(ctx, TAB)?; idx += 1; } 0x00..=0x07 | 0x0B..=0x0C | 0x0E..=0x1F | 0x7F => { - ctx.output.push(line[idx]); + push_byte(ctx, line[idx])?; if ctx.spaces && line[idx].is_ascii_whitespace() && line[idx] != CR { *ctx.last_space = Some(ctx.output.len() - 1); } else if !ctx.spaces { *ctx.last_space = None; } + + if ctx.mode == WidthMode::Characters { + *ctx.col_count = ctx.col_count.saturating_add(1); + if *ctx.col_count >= ctx.width { + emit_output(ctx)?; + } + } idx += 1; } _ => { @@ -405,7 +451,7 @@ fn push_ascii_segment(segment: &[u8], ctx: &mut FoldContext<'_, W>) -> let take = remaining.len().min(available); let base_len = ctx.output.len(); - ctx.output.extend_from_slice(&remaining[..take]); + push_bytes(ctx, &remaining[..take])?; *ctx.col_count += take; if ctx.spaces { @@ -430,16 +476,26 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes return process_ascii_line(line.as_bytes(), ctx); } + process_utf8_chars(line, ctx) +} + +fn process_utf8_chars(line: &str, ctx: &mut FoldContext<'_, W>) -> UResult<()> { let line_bytes = line.as_bytes(); let mut iter = line.char_indices().peekable(); while let Some((byte_idx, ch)) = iter.next() { - // Include combining characters with the base character - while let Some(&(_, next_ch)) = iter.peek() { - if unicode_width::UnicodeWidthChar::width(next_ch).unwrap_or(1) == 0 { - iter.next(); - } else { - break; + // Include combining characters with the base character when we are + // measuring by display columns. In character-counting mode every + // scalar value must advance the counter to match `chars().count()` + // semantics (see `fold_characters_reference` in the tests), so we do + // not coalesce zero-width scalars there. + if ctx.mode == WidthMode::Columns { + while let Some(&(_, next_ch)) = iter.peek() { + if unicode_width::UnicodeWidthChar::width(next_ch).unwrap_or(1) == 0 { + iter.next(); + } else { + break; + } } } @@ -448,7 +504,7 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes if ch == '\n' { *ctx.last_space = None; emit_output(ctx)?; - break; + continue; } if *ctx.col_count >= ctx.width { @@ -456,15 +512,13 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes } if ch == '\r' { - ctx.output - .extend_from_slice(&line_bytes[byte_idx..next_idx]); + push_bytes(ctx, &line_bytes[byte_idx..next_idx])?; *ctx.col_count = 0; continue; } if ch == '\x08' { - ctx.output - .extend_from_slice(&line_bytes[byte_idx..next_idx]); + push_bytes(ctx, &line_bytes[byte_idx..next_idx])?; *ctx.col_count = ctx.col_count.saturating_sub(1); continue; } @@ -484,8 +538,7 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes } else { *ctx.last_space = None; } - ctx.output - .extend_from_slice(&line_bytes[byte_idx..next_idx]); + push_bytes(ctx, &line_bytes[byte_idx..next_idx])?; continue; } @@ -506,8 +559,7 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes *ctx.last_space = Some(ctx.output.len()); } - ctx.output - .extend_from_slice(&line_bytes[byte_idx..next_idx]); + push_bytes(ctx, &line_bytes[byte_idx..next_idx])?; *ctx.col_count = ctx.col_count.saturating_add(added); } @@ -519,7 +571,7 @@ fn process_non_utf8_line(line: &[u8], ctx: &mut FoldContext<'_, W>) -> if byte == NL { *ctx.last_space = None; emit_output(ctx)?; - break; + continue; } if *ctx.col_count >= ctx.width { @@ -539,7 +591,7 @@ fn process_non_utf8_line(line: &[u8], ctx: &mut FoldContext<'_, W>) -> } else { None }; - ctx.output.push(byte); + push_byte(ctx, byte)?; continue; } 0x08 => *ctx.col_count = ctx.col_count.saturating_sub(1), @@ -550,7 +602,46 @@ fn process_non_utf8_line(line: &[u8], ctx: &mut FoldContext<'_, W>) -> _ => *ctx.col_count = ctx.col_count.saturating_add(1), } - ctx.output.push(byte); + push_byte(ctx, byte)?; + } + + Ok(()) +} + +/// Process buffered bytes, emitting output for valid UTF-8 prefixes and +/// deferring incomplete sequences until more input arrives. +/// +/// If the buffer contains invalid UTF-8, it is handled in non-UTF-8 mode and +/// the buffer is fully consumed. +fn process_pending_chunk( + pending: &mut Vec, + ctx: &mut FoldContext<'_, W>, +) -> UResult<()> { + while !pending.is_empty() { + match std::str::from_utf8(pending) { + Ok(valid) => { + process_utf8_line(valid, ctx)?; + pending.clear(); + break; + } + Err(err) => { + if err.error_len().is_some() { + let res = process_non_utf8_line(pending, ctx); + pending.clear(); + res?; + break; + } + + let valid_up_to = err.valid_up_to(); + if valid_up_to == 0 { + break; + } + + let valid = std::str::from_utf8(&pending[..valid_up_to]).expect("valid prefix"); + process_utf8_line(valid, ctx)?; + pending.drain(..valid_up_to); + } + } } Ok(()) @@ -572,20 +663,12 @@ fn fold_file( mode: WidthMode, writer: &mut W, ) -> UResult<()> { - let mut line = Vec::new(); let mut output = Vec::new(); let mut col_count = 0; let mut last_space = None; + let mut pending = Vec::with_capacity(8 * 1024); - loop { - if file - .read_until(NL, &mut line) - .map_err_context(|| translate!("fold-error-readline"))? - == 0 - { - break; - } - + { let mut ctx = FoldContext { spaces, width, @@ -596,17 +679,32 @@ fn fold_file( last_space: &mut last_space, }; - match std::str::from_utf8(&line) { - Ok(s) => process_utf8_line(s, &mut ctx)?, - Err(_) => process_non_utf8_line(&line, &mut ctx)?, + loop { + let buffer = file + .fill_buf() + .map_err_context(|| translate!("fold-error-readline"))?; + if buffer.is_empty() { + break; + } + pending.extend_from_slice(buffer); + let consumed = buffer.len(); + file.consume(consumed); + + process_pending_chunk(&mut pending, &mut ctx)?; } - line.clear(); - } + if !pending.is_empty() { + match std::str::from_utf8(&pending) { + Ok(s) => process_utf8_line(s, &mut ctx)?, + Err(_) => process_non_utf8_line(&pending, &mut ctx)?, + } + pending.clear(); + } - if !output.is_empty() { - writer.write_all(&output)?; - output.clear(); + if !ctx.output.is_empty() { + ctx.writer.write_all(ctx.output)?; + ctx.output.clear(); + } } Ok(()) diff --git a/src/uu/groups/src/groups.rs b/src/uu/groups/src/groups.rs index 772e23cf5f0..d6ecc9ec4ac 100644 --- a/src/uu/groups/src/groups.rs +++ b/src/uu/groups/src/groups.rs @@ -5,6 +5,7 @@ // spell-checker:ignore (ToDO) passwd +use std::io::{Write, stdout}; use thiserror::Error; use uucore::{ display::Quotable, @@ -59,7 +60,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { return Err(GroupsError::GetGroupsFailed.into()); }; let groups: Vec = gids.iter().map(infallible_gid2grp).collect(); - println!("{}", groups.join(" ")); + writeln!(stdout(), "{}", groups.join(" "))?; return Ok(()); } @@ -67,7 +68,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { match Passwd::locate(user.as_str()) { Ok(p) => { let groups: Vec = p.belongs_to().iter().map(infallible_gid2grp).collect(); - println!("{user} : {}", groups.join(" ")); + writeln!(stdout(), "{user} : {}", groups.join(" "))?; } Err(_) => { // The `show!()` macro sets the global exit code for the program. diff --git a/src/uu/hashsum/Cargo.toml b/src/uu/hashsum/Cargo.toml index f77c2c52d84..4c28a1588b6 100644 --- a/src/uu/hashsum/Cargo.toml +++ b/src/uu/hashsum/Cargo.toml @@ -27,6 +27,4 @@ name = "hashsum" path = "src/main.rs" [dev-dependencies] -divan = { workspace = true } -tempfile = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 1bad3635558..6c401041ab4 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -13,7 +13,7 @@ use clap::builder::ValueParser; use clap::{Arg, ArgAction, ArgMatches, Command}; use uucore::checksum::compute::{ - ChecksumComputeOptions, figure_out_output_format, perform_checksum_computation, + ChecksumComputeOptions, OutputFormat, perform_checksum_computation, }; use uucore::checksum::validate::{ ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, @@ -121,9 +121,6 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { let args = iter::once(program.clone()).chain(args); - // Default binary in Windows, text mode otherwise - let binary_flag_default = cfg!(windows); - let (command, is_hashsum_bin) = uu_app(&binary_name); // FIXME: this should use try_get_matches_from() and crash!(), but at the moment that just @@ -148,13 +145,6 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { (AlgoKind::from_bin_name(&binary_name)?, length) }; - let binary = if matches.get_flag("binary") { - true - } else if matches.get_flag("text") { - false - } else { - binary_flag_default - }; let check = matches.get_flag("check"); let check_flag = |flag| match (check, matches.get_flag(flag)) { @@ -204,16 +194,11 @@ pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { let algo = SizedAlgoKind::from_unsized(algo_kind, length)?; let line_ending = LineEnding::from_zero_flag(matches.get_flag("zero")); + let output_format = OutputFormat::from_standalone(std::env::args_os())?; let opts = ChecksumComputeOptions { algo_kind: algo, - output_format: figure_out_output_format( - algo, - matches.get_flag(options::TAG), - binary, - /* raw */ false, - /* base64: */ false, - ), + output_format, line_ending, }; diff --git a/src/uu/install/src/install.rs b/src/uu/install/src/install.rs index e128470fcc8..7dde478b4c3 100644 --- a/src/uu/install/src/install.rs +++ b/src/uu/install/src/install.rs @@ -10,7 +10,7 @@ mod mode; use clap::{Arg, ArgAction, ArgMatches, Command}; use file_diff::diff; use filetime::{FileTime, set_file_times}; -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] use selinux::SecurityContext; use std::ffi::OsString; use std::fmt::Debug; @@ -27,7 +27,7 @@ use uucore::error::{FromIo, UError, UResult, UUsageError}; use uucore::fs::dir_strip_dot_for_creation; use uucore::perms::{Verbosity, VerbosityLevel, wrap_chown}; use uucore::process::{getegid, geteuid}; -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] use uucore::selinux::{ SeLinuxError, contexts_differ, get_selinux_security_context, is_selinux_enabled, selinux_error_description, set_selinux_security_context, @@ -118,7 +118,7 @@ enum InstallError { #[error("{}", translate!("install-error-extra-operand", "operand" => .0.quote(), "usage" => .1.clone()))] ExtraOperand(OsString, String), - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] #[error("{}", .0)] SelinuxContextFailed(String), } @@ -1004,7 +1004,7 @@ fn copy(from: &Path, to: &Path, b: &Behavior) -> UResult<()> { Ok(()) } -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] fn get_context_for_selinux(b: &Behavior) -> Option<&String> { if b.default_context { None @@ -1139,7 +1139,7 @@ fn need_copy(from: &Path, to: &Path, b: &Behavior) -> bool { false } -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] /// Sets the `SELinux` security context for install's -Z flag behavior. /// /// This function implements the specific behavior needed for install's -Z flag, @@ -1173,7 +1173,7 @@ pub fn set_selinux_default_context(path: &Path) -> Result<(), SeLinuxError> { } } -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] /// Gets the default `SELinux` context for a path based on the system's security policy. /// /// This function attempts to determine what the "correct" `SELinux` context should be @@ -1229,7 +1229,7 @@ fn get_default_context_for_path(path: &Path) -> Result, SeLinuxEr Ok(None) } -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] /// Derives an appropriate `SELinux` context based on a parent directory context. /// /// This is a heuristic function that attempts to generate an appropriate @@ -1267,7 +1267,7 @@ fn derive_context_from_parent(parent_context: &str) -> String { } } -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] /// Helper function to collect paths that need `SELinux` context setting. /// /// Traverses from the given starting path up to existing parent directories. @@ -1281,7 +1281,7 @@ fn collect_paths_for_context_setting(starting_path: &Path) -> Vec<&Path> { paths } -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] /// Sets the `SELinux` security context for a directory hierarchy. /// /// This function traverses from the given starting path up to existing parent directories @@ -1321,7 +1321,7 @@ fn set_selinux_context_for_directories(target_path: &Path, context: Option<&Stri } } -#[cfg(all(feature = "selinux", target_os = "linux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] /// Sets `SELinux` context for created directories using install's -Z default behavior. /// /// Similar to `set_selinux_context_for_directories` but uses install's @@ -1345,10 +1345,10 @@ pub fn set_selinux_context_for_directories_install(target_path: &Path, context: #[cfg(test)] mod tests { - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] use super::derive_context_from_parent; - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] #[test] fn test_derive_context_from_parent() { // Test cases: (input_context, file_type, expected_output, description) diff --git a/src/uu/kill/Cargo.toml b/src/uu/kill/Cargo.toml index 1813084af06..10de8379d35 100644 --- a/src/uu/kill/Cargo.toml +++ b/src/uu/kill/Cargo.toml @@ -19,10 +19,12 @@ path = "src/kill.rs" [dependencies] clap = { workspace = true } -nix = { workspace = true, features = ["signal"] } uucore = { workspace = true, features = ["signals"] } fluent = { workspace = true } +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["signal"] } + [[bin]] name = "kill" path = "src/main.rs" diff --git a/src/uu/logname/src/logname.rs b/src/uu/logname/src/logname.rs index 3dd99549500..6684bd9f446 100644 --- a/src/uu/logname/src/logname.rs +++ b/src/uu/logname/src/logname.rs @@ -7,6 +7,7 @@ use clap::Command; use std::ffi::CStr; +use std::io::{Write, stdout}; use uucore::translate; use uucore::{error::UResult, show_error}; @@ -26,7 +27,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let _ = uucore::clap_localization::handle_clap_result(uu_app(), args)?; match get_userlogin() { - Some(userlogin) => println!("{userlogin}"), + Some(userlogin) => writeln!(stdout(), "{userlogin}")?, None => show_error!("{}", translate!("logname-error-no-login-name")), } diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 6694d7bcadd..1bad3002335 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -373,7 +373,7 @@ pub struct Config { time_format_recent: String, // Time format for recent dates time_format_older: Option, // Time format for older dates (optional, if not present, time_format_recent is used) context: bool, - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] selinux_supported: bool, #[cfg(all(feature = "smack", target_os = "linux"))] smack_supported: bool, @@ -1233,7 +1233,7 @@ impl Config { time_format_recent, time_format_older, context, - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] selinux_supported: uucore::selinux::is_selinux_enabled(), #[cfg(all(feature = "smack", target_os = "linux"))] smack_supported: uucore::smack::is_smack_enabled(), @@ -3531,7 +3531,7 @@ fn get_security_context<'a>( } } - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] if config.selinux_supported { match selinux::SecurityContext::of_path(path, must_dereference, false) { Err(_r) => { diff --git a/src/uu/md5sum/Cargo.toml b/src/uu/md5sum/Cargo.toml new file mode 100644 index 00000000000..70ecfe0cdc3 --- /dev/null +++ b/src/uu/md5sum/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "uu_md5sum" +description = "md5sum ~ (uutils) Print or check the MD5 checksums" +repository = "https://github.com/uutils/coreutils/tree/main/src/uu/md5sum" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[lib] +path = "src/md5sum.rs" + +[dependencies] +clap = { workspace = true } +uu_checksum_common = { workspace = true } +uucore = { workspace = true, features = [ + "checksum", + "encoding", + "sum", + "hardware", +] } +fluent = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } + +[[bin]] +name = "md5sum" +path = "src/main.rs" + +# [[bench]] +# name = "b2sum_bench" +# harness = false diff --git a/src/uu/md5sum/LICENSE b/src/uu/md5sum/LICENSE new file mode 120000 index 00000000000..5853aaea53b --- /dev/null +++ b/src/uu/md5sum/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/md5sum/locales/en-US.ftl b/src/uu/md5sum/locales/en-US.ftl new file mode 100644 index 00000000000..9712ff7c66d --- /dev/null +++ b/src/uu/md5sum/locales/en-US.ftl @@ -0,0 +1,2 @@ +md5sum-about = Print or check the MD5 checksums +md5sum-usage = md5sum [OPTIONS] [FILE]... diff --git a/src/uu/md5sum/locales/fr-FR.ftl b/src/uu/md5sum/locales/fr-FR.ftl new file mode 100644 index 00000000000..8da43df3665 --- /dev/null +++ b/src/uu/md5sum/locales/fr-FR.ftl @@ -0,0 +1,2 @@ +md5sum-about = Afficher le MD5 et la taille de chaque fichier +md5sum-usage = md5sum [OPTION]... [FICHIER]... diff --git a/src/uu/md5sum/src/main.rs b/src/uu/md5sum/src/main.rs new file mode 100644 index 00000000000..d5509656f93 --- /dev/null +++ b/src/uu/md5sum/src/main.rs @@ -0,0 +1 @@ +uucore::bin!(uu_md5sum); diff --git a/src/uu/md5sum/src/md5sum.rs b/src/uu/md5sum/src/md5sum.rs new file mode 100644 index 00000000000..c9366eb4ba9 --- /dev/null +++ b/src/uu/md5sum/src/md5sum.rs @@ -0,0 +1 @@ +uu_checksum_common::declare_standalone!("md5sum", uucore::checksum::AlgoKind::Md5); diff --git a/src/uu/mkfifo/Cargo.toml b/src/uu/mkfifo/Cargo.toml index 9006143441b..ece4838101c 100644 --- a/src/uu/mkfifo/Cargo.toml +++ b/src/uu/mkfifo/Cargo.toml @@ -19,11 +19,12 @@ path = "src/mkfifo.rs" [dependencies] clap = { workspace = true } -libc = { workspace = true } -nix = { workspace = true, features = ["fs"] } uucore = { workspace = true, features = ["fs", "mode"] } fluent = { workspace = true } +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["fs"] } + [features] selinux = ["uucore/selinux"] smack = ["uucore/smack"] diff --git a/src/uu/mkfifo/src/mkfifo.rs b/src/uu/mkfifo/src/mkfifo.rs index 82c5ec1c6da..740e8cdb475 100644 --- a/src/uu/mkfifo/src/mkfifo.rs +++ b/src/uu/mkfifo/src/mkfifo.rs @@ -65,7 +65,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } // Apply SELinux context if requested - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] { // Extract the SELinux related flags and options let set_security_context = matches.get_flag(options::SECURITY_CONTEXT); diff --git a/src/uu/more/Cargo.toml b/src/uu/more/Cargo.toml index cd65f7412fc..bee3ff755f8 100644 --- a/src/uu/more/Cargo.toml +++ b/src/uu/more/Cargo.toml @@ -19,12 +19,14 @@ path = "src/more.rs" [dependencies] clap = { workspace = true } +crossterm = { workspace = true, features = ["events"] } uucore = { workspace = true } -crossterm = { workspace = true } fluent = { workspace = true } [target.'cfg(all(unix, not(target_os = "fuchsia")))'.dependencies] -nix = { workspace = true } + +[target.'cfg(windows)'.dependencies] +crossterm = { workspace = true, features = ["windows"] } [target.'cfg(target_os = "macos")'.dependencies] crossterm = { workspace = true, features = ["use-dev-tty"] } diff --git a/src/uu/nice/Cargo.toml b/src/uu/nice/Cargo.toml index f58c7f3d7cf..00f96718eb5 100644 --- a/src/uu/nice/Cargo.toml +++ b/src/uu/nice/Cargo.toml @@ -20,10 +20,12 @@ path = "src/nice.rs" [dependencies] clap = { workspace = true } libc = { workspace = true } -nix = { workspace = true } uucore = { workspace = true } fluent = { workspace = true } +[target.'cfg(unix)'.dependencies] +nix = { workspace = true } + [[bin]] name = "nice" path = "src/main.rs" diff --git a/src/uu/numfmt/Cargo.toml b/src/uu/numfmt/Cargo.toml index 177f2e3b8ac..fed39ad68f3 100644 --- a/src/uu/numfmt/Cargo.toml +++ b/src/uu/numfmt/Cargo.toml @@ -25,7 +25,6 @@ fluent = { workspace = true } [dev-dependencies] divan = { workspace = true } -tempfile = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } [[bin]] diff --git a/src/uu/pr/src/pr.rs b/src/uu/pr/src/pr.rs index b37f3a88330..e5faac408af 100644 --- a/src/uu/pr/src/pr.rs +++ b/src/uu/pr/src/pr.rs @@ -989,7 +989,7 @@ fn print_page( lines: &[FileLine], options: &OutputOptions, page: usize, -) -> Result { +) -> Result<(), std::io::Error> { let line_separator = options.line_separator.as_bytes(); let page_separator = options.page_separator_char.as_bytes(); @@ -1004,7 +1004,7 @@ fn print_page( out.write_all(line_separator)?; } - let lines_written = write_columns(lines, options, &mut out)?; + write_columns(lines, options, &mut out)?; for (index, x) in trailer_content.iter().enumerate() { out.write_all(x.as_bytes())?; @@ -1014,7 +1014,7 @@ fn print_page( } out.write_all(page_separator)?; out.flush()?; - Ok(lines_written) + Ok(()) } #[allow(clippy::cognitive_complexity)] @@ -1022,7 +1022,7 @@ fn write_columns( lines: &[FileLine], options: &OutputOptions, out: &mut impl Write, -) -> Result { +) -> Result<(), std::io::Error> { let line_separator = options.content_line_separator.as_bytes(); let content_lines_per_page = if options.double_space { @@ -1035,7 +1035,6 @@ fn write_columns( .merge_files_print .unwrap_or_else(|| get_columns(options)); let line_width = options.line_width; - let mut lines_printed = 0; let feed_line_present = options.form_feed_used; let mut not_found_break = false; @@ -1101,7 +1100,6 @@ fn write_columns( get_line_for_printing(options, file_line, columns, i, line_width, indexes) .as_bytes(), )?; - lines_printed += 1; } } if not_found_break && feed_line_present { @@ -1110,7 +1108,7 @@ fn write_columns( out.write_all(line_separator)?; } - Ok(lines_printed) + Ok(()) } fn get_line_for_printing( diff --git a/src/uu/readlink/src/readlink.rs b/src/uu/readlink/src/readlink.rs index cdc1d97b02e..f220d6a0515 100644 --- a/src/uu/readlink/src/readlink.rs +++ b/src/uu/readlink/src/readlink.rs @@ -185,7 +185,7 @@ pub fn uu_app() -> Command { fn show(path: &Path, line_ending: Option) -> std::io::Result<()> { uucore::display::print_verbatim(path)?; if let Some(line_ending) = line_ending { - print!("{line_ending}"); + write!(stdout(), "{line_ending}")?; } stdout().flush() } diff --git a/src/uu/rm/src/rm.rs b/src/uu/rm/src/rm.rs index 252c723406b..a4fb32bcb45 100644 --- a/src/uu/rm/src/rm.rs +++ b/src/uu/rm/src/rm.rs @@ -838,7 +838,9 @@ fn path_is_current_or_parent_directory(path: &Path) -> bool { let dir_separator = MAIN_SEPARATOR as u8; if let Ok(path_bytes) = path_str { return path_bytes == ([b'.']) + || path_bytes == ([b'.', dir_separator]) || path_bytes == ([b'.', b'.']) + || path_bytes == ([b'.', b'.', dir_separator]) || path_bytes.ends_with(&[dir_separator, b'.']) || path_bytes.ends_with(&[dir_separator, b'.', b'.']) || path_bytes.ends_with(&[dir_separator, b'.', dir_separator]) diff --git a/src/uu/runcon/Cargo.toml b/src/uu/runcon/Cargo.toml index f358c31ec05..fdb2f51746d 100644 --- a/src/uu/runcon/Cargo.toml +++ b/src/uu/runcon/Cargo.toml @@ -17,7 +17,8 @@ workspace = true [lib] path = "src/runcon.rs" -[target.'cfg(target_os = "linux")'.dependencies] # todo: block fetching crates without feat_selinux +# TODO: block fetching crates without feat_selinux +[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] clap = { workspace = true } uucore = { workspace = true, features = ["entries", "fs", "perms", "selinux"] } selinux = { workspace = true } diff --git a/src/uu/runcon/src/errors.rs b/src/uu/runcon/src/errors.rs index 4fa3135ca4a..49dc83d16c2 100644 --- a/src/uu/runcon/src/errors.rs +++ b/src/uu/runcon/src/errors.rs @@ -2,7 +2,8 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -#![cfg(target_os = "linux")] + +#![cfg(any(target_os = "linux", target_os = "android"))] use std::ffi::OsString; use std::fmt::{Display, Formatter, Write}; diff --git a/src/uu/runcon/src/main.rs b/src/uu/runcon/src/main.rs index dde0f239467..947934af1cb 100644 --- a/src/uu/runcon/src/main.rs +++ b/src/uu/runcon/src/main.rs @@ -1,11 +1,18 @@ -// On non-Linux targets, provide a stub main to keep the binary target present -// and the workspace buildable. Using item-level cfg avoids excluding the crate -// entirely (via #![cfg(...)]), which can break tooling and cross builds that -// expect this binary to exist even when it's a no-op off Linux. -#[cfg(target_os = "linux")] +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! This package is specific to Android and some Linux distributions. On other +//! targets, provide a stub main to keep the binary target present and the +//! workspace buildable. Using item-level cfg avoids excluding the crate +//! entirely (via #![cfg(...)]), which can break tooling and cross builds that +//! expect this binary to exist even when it's a no-op off Linux. + +#[cfg(any(target_os = "linux", target_os = "android"))] uucore::bin!(uu_runcon); -#[cfg(not(target_os = "linux"))] +#[cfg(not(any(target_os = "linux", target_os = "android")))] fn main() { eprintln!("runcon: SELinux is not supported on this platform"); std::process::exit(1); diff --git a/src/uu/runcon/src/runcon.rs b/src/uu/runcon/src/runcon.rs index 60c71d1dca3..128d0dce370 100644 --- a/src/uu/runcon/src/runcon.rs +++ b/src/uu/runcon/src/runcon.rs @@ -2,8 +2,10 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. + // spell-checker:ignore (vars) RFILE execv execvp -#![cfg(target_os = "linux")] + +#![cfg(any(target_os = "linux", target_os = "android"))] use clap::builder::ValueParser; use uucore::error::{UError, UResult}; diff --git a/src/uu/seq/Cargo.toml b/src/uu/seq/Cargo.toml index 534b675e126..cdc1c29af88 100644 --- a/src/uu/seq/Cargo.toml +++ b/src/uu/seq/Cargo.toml @@ -40,7 +40,6 @@ path = "src/main.rs" [dev-dependencies] divan = { workspace = true } -tempfile = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } [[bench]] diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index b931cc8b11d..29373a511f8 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -92,22 +92,8 @@ fn select_precision( } } -// Initialize SIGPIPE state capture at process startup (Unix only) -#[cfg(unix)] -uucore::init_startup_state_capture!(); - #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - // Restore SIGPIPE to default if it wasn't explicitly ignored by parent. - // The Rust runtime ignores SIGPIPE, but we need to respect the parent's - // signal disposition for proper pipeline behavior (GNU compatibility). - #[cfg(unix)] - if !signals::sigpipe_was_ignored() { - // Ignore the return value: if setting signal handler fails, we continue anyway. - // The worst case is we don't get proper SIGPIPE behavior, but seq will still work. - let _ = signals::enable_pipe_errors(); - } - let matches = uucore::clap_localization::handle_clap_result(uu_app(), split_short_args_with_value(args))?; diff --git a/src/uu/sha1sum/Cargo.toml b/src/uu/sha1sum/Cargo.toml new file mode 100644 index 00000000000..001bddd69e3 --- /dev/null +++ b/src/uu/sha1sum/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "uu_sha1sum" +description = "sha1sum ~ (uutils) Print or check the SHA1 checksums" +repository = "https://github.com/uutils/coreutils/tree/main/src/uu/sha1sum" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[lib] +path = "src/sha1sum.rs" + +[dependencies] +clap = { workspace = true } +uu_checksum_common = { workspace = true } +uucore = { workspace = true, features = [ + "checksum", + "encoding", + "sum", + "hardware", +] } +fluent = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } + +[[bin]] +name = "sha1sum" +path = "src/main.rs" diff --git a/src/uu/sha1sum/LICENSE b/src/uu/sha1sum/LICENSE new file mode 120000 index 00000000000..5853aaea53b --- /dev/null +++ b/src/uu/sha1sum/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/sha1sum/locales/en-US.ftl b/src/uu/sha1sum/locales/en-US.ftl new file mode 100644 index 00000000000..378b8f8d456 --- /dev/null +++ b/src/uu/sha1sum/locales/en-US.ftl @@ -0,0 +1,2 @@ +sha1sum-about = Print or check the SHA1 checksums +sha1sum-usage = sha1sum [OPTIONS] [FILE]... diff --git a/src/uu/sha1sum/locales/fr-FR.ftl b/src/uu/sha1sum/locales/fr-FR.ftl new file mode 100644 index 00000000000..865bd8071b4 --- /dev/null +++ b/src/uu/sha1sum/locales/fr-FR.ftl @@ -0,0 +1,2 @@ +sha1sum-about = Afficher le SHA1 et la taille de chaque fichier +sha1sum-usage = sha1sum [OPTION]... [FICHIER]... diff --git a/src/uu/sha1sum/src/main.rs b/src/uu/sha1sum/src/main.rs new file mode 100644 index 00000000000..18d80cfdeac --- /dev/null +++ b/src/uu/sha1sum/src/main.rs @@ -0,0 +1 @@ +uucore::bin!(uu_sha1sum); diff --git a/src/uu/sha1sum/src/sha1sum.rs b/src/uu/sha1sum/src/sha1sum.rs new file mode 100644 index 00000000000..e715c79661e --- /dev/null +++ b/src/uu/sha1sum/src/sha1sum.rs @@ -0,0 +1 @@ +uu_checksum_common::declare_standalone!("sha1sum", uucore::checksum::AlgoKind::Sha1); diff --git a/src/uu/sha224sum/Cargo.toml b/src/uu/sha224sum/Cargo.toml new file mode 100644 index 00000000000..25086ee4248 --- /dev/null +++ b/src/uu/sha224sum/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "uu_sha224sum" +description = "sha224sum ~ (uutils) Print or check the SHA224 checksums" +repository = "https://github.com/uutils/coreutils/tree/main/src/uu/sha224sum" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[lib] +path = "src/sha224sum.rs" + +[dependencies] +clap = { workspace = true } +uu_checksum_common = { workspace = true } +uucore = { workspace = true, features = [ + "checksum", + "encoding", + "sum", + "hardware", +] } +fluent = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } + +[[bin]] +name = "sha224sum" +path = "src/main.rs" diff --git a/src/uu/sha224sum/LICENSE b/src/uu/sha224sum/LICENSE new file mode 120000 index 00000000000..5853aaea53b --- /dev/null +++ b/src/uu/sha224sum/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/sha224sum/locales/en-US.ftl b/src/uu/sha224sum/locales/en-US.ftl new file mode 100644 index 00000000000..00f852b7126 --- /dev/null +++ b/src/uu/sha224sum/locales/en-US.ftl @@ -0,0 +1,2 @@ +sha224sum-about = Print or check the SHA224 checksums +sha224sum-usage = sha224sum [OPTIONS] [FILE]... diff --git a/src/uu/sha224sum/locales/fr-FR.ftl b/src/uu/sha224sum/locales/fr-FR.ftl new file mode 100644 index 00000000000..dbd90e9f398 --- /dev/null +++ b/src/uu/sha224sum/locales/fr-FR.ftl @@ -0,0 +1,2 @@ +sha224sum-about = Afficher le SHA224 et la taille de chaque fichier +sha224sum-usage = sha224sum [OPTION]... [FICHIER]... diff --git a/src/uu/sha224sum/src/main.rs b/src/uu/sha224sum/src/main.rs new file mode 100644 index 00000000000..974671331cd --- /dev/null +++ b/src/uu/sha224sum/src/main.rs @@ -0,0 +1 @@ +uucore::bin!(uu_sha224sum); diff --git a/src/uu/sha224sum/src/sha224sum.rs b/src/uu/sha224sum/src/sha224sum.rs new file mode 100644 index 00000000000..3491046756a --- /dev/null +++ b/src/uu/sha224sum/src/sha224sum.rs @@ -0,0 +1 @@ +uu_checksum_common::declare_standalone!("sha224sum", uucore::checksum::AlgoKind::Sha224); diff --git a/src/uu/sha256sum/Cargo.toml b/src/uu/sha256sum/Cargo.toml new file mode 100644 index 00000000000..2ca6204c041 --- /dev/null +++ b/src/uu/sha256sum/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "uu_sha256sum" +description = "sha256sum ~ (uutils) Print or check the SHA256 checksums" +repository = "https://github.com/uutils/coreutils/tree/main/src/uu/sha256sum" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[lib] +path = "src/sha256sum.rs" + +[dependencies] +clap = { workspace = true } +uu_checksum_common = { workspace = true } +uucore = { workspace = true, features = [ + "checksum", + "encoding", + "sum", + "hardware", +] } +fluent = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } + +[[bin]] +name = "sha256sum" +path = "src/main.rs" diff --git a/src/uu/sha256sum/LICENSE b/src/uu/sha256sum/LICENSE new file mode 120000 index 00000000000..5853aaea53b --- /dev/null +++ b/src/uu/sha256sum/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/sha256sum/locales/en-US.ftl b/src/uu/sha256sum/locales/en-US.ftl new file mode 100644 index 00000000000..60a0b4a3f33 --- /dev/null +++ b/src/uu/sha256sum/locales/en-US.ftl @@ -0,0 +1,2 @@ +sha256sum-about = Print or check the SHA256 checksums +sha256sum-usage = sha256sum [OPTIONS] [FILE]... diff --git a/src/uu/sha256sum/locales/fr-FR.ftl b/src/uu/sha256sum/locales/fr-FR.ftl new file mode 100644 index 00000000000..baaa2f83bd7 --- /dev/null +++ b/src/uu/sha256sum/locales/fr-FR.ftl @@ -0,0 +1,2 @@ +sha256sum-about = Afficher le SHA256 et la taille de chaque fichier +sha256sum-usage = sha256sum [OPTION]... [FICHIER]... diff --git a/src/uu/sha256sum/src/main.rs b/src/uu/sha256sum/src/main.rs new file mode 100644 index 00000000000..323cd315df1 --- /dev/null +++ b/src/uu/sha256sum/src/main.rs @@ -0,0 +1 @@ +uucore::bin!(uu_sha256sum); diff --git a/src/uu/sha256sum/src/sha256sum.rs b/src/uu/sha256sum/src/sha256sum.rs new file mode 100644 index 00000000000..ab47a23df44 --- /dev/null +++ b/src/uu/sha256sum/src/sha256sum.rs @@ -0,0 +1 @@ +uu_checksum_common::declare_standalone!("sha256sum", uucore::checksum::AlgoKind::Sha256); diff --git a/src/uu/sha384sum/Cargo.toml b/src/uu/sha384sum/Cargo.toml new file mode 100644 index 00000000000..2fb9ca0375b --- /dev/null +++ b/src/uu/sha384sum/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "uu_sha384sum" +description = "sha384sum ~ (uutils) Print or check the SHA384 checksums" +repository = "https://github.com/uutils/coreutils/tree/main/src/uu/sha384sum" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[lib] +path = "src/sha384sum.rs" + +[dependencies] +clap = { workspace = true } +uu_checksum_common = { workspace = true } +uucore = { workspace = true, features = [ + "checksum", + "encoding", + "sum", + "hardware", +] } +fluent = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } + +[[bin]] +name = "sha384sum" +path = "src/main.rs" diff --git a/src/uu/sha384sum/LICENSE b/src/uu/sha384sum/LICENSE new file mode 120000 index 00000000000..5853aaea53b --- /dev/null +++ b/src/uu/sha384sum/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/sha384sum/locales/en-US.ftl b/src/uu/sha384sum/locales/en-US.ftl new file mode 100644 index 00000000000..e10a99c1eb3 --- /dev/null +++ b/src/uu/sha384sum/locales/en-US.ftl @@ -0,0 +1,2 @@ +sha384sum-about = Print or check the SHA384 checksums +sha384sum-usage = sha384sum [OPTIONS] [FILE]... diff --git a/src/uu/sha384sum/locales/fr-FR.ftl b/src/uu/sha384sum/locales/fr-FR.ftl new file mode 100644 index 00000000000..f751315eceb --- /dev/null +++ b/src/uu/sha384sum/locales/fr-FR.ftl @@ -0,0 +1,2 @@ +sha1sum-about = Afficher le SHA384 et la taille de chaque fichier +sha1sum-usage = sha384sum [OPTION]... [FICHIER]... diff --git a/src/uu/sha384sum/src/main.rs b/src/uu/sha384sum/src/main.rs new file mode 100644 index 00000000000..c87f32e286e --- /dev/null +++ b/src/uu/sha384sum/src/main.rs @@ -0,0 +1 @@ +uucore::bin!(uu_sha384sum); diff --git a/src/uu/sha384sum/src/sha384sum.rs b/src/uu/sha384sum/src/sha384sum.rs new file mode 100644 index 00000000000..818478e294d --- /dev/null +++ b/src/uu/sha384sum/src/sha384sum.rs @@ -0,0 +1 @@ +uu_checksum_common::declare_standalone!("sha384sum", uucore::checksum::AlgoKind::Sha384); diff --git a/src/uu/sha512sum/Cargo.toml b/src/uu/sha512sum/Cargo.toml new file mode 100644 index 00000000000..0cea1453b07 --- /dev/null +++ b/src/uu/sha512sum/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "uu_sha512sum" +description = "sha512sum ~ (uutils) Print or check the SHA512 checksums" +repository = "https://github.com/uutils/coreutils/tree/main/src/uu/sha512sum" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[lib] +path = "src/sha512sum.rs" + +[dependencies] +clap = { workspace = true } +uu_checksum_common = { workspace = true } +uucore = { workspace = true, features = [ + "checksum", + "encoding", + "sum", + "hardware", +] } +fluent = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +tempfile = { workspace = true } +uucore = { workspace = true, features = ["benchmark"] } + +[[bin]] +name = "sha512sum" +path = "src/main.rs" diff --git a/src/uu/sha512sum/LICENSE b/src/uu/sha512sum/LICENSE new file mode 120000 index 00000000000..5853aaea53b --- /dev/null +++ b/src/uu/sha512sum/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/sha512sum/locales/en-US.ftl b/src/uu/sha512sum/locales/en-US.ftl new file mode 100644 index 00000000000..395a9007771 --- /dev/null +++ b/src/uu/sha512sum/locales/en-US.ftl @@ -0,0 +1,2 @@ +sha512sum-about = Print or check the SHA512 checksums +sha512sum-usage = sha512sum [OPTIONS] [FILE]... diff --git a/src/uu/sha512sum/locales/fr-FR.ftl b/src/uu/sha512sum/locales/fr-FR.ftl new file mode 100644 index 00000000000..59abcc2f9bc --- /dev/null +++ b/src/uu/sha512sum/locales/fr-FR.ftl @@ -0,0 +1,2 @@ +sha512sum-about = Afficher le SHA512 et la taille de chaque fichier +sha512sum-usage = sha512sum [OPTION]... [FICHIER]... diff --git a/src/uu/sha512sum/src/main.rs b/src/uu/sha512sum/src/main.rs new file mode 100644 index 00000000000..64a6ecea622 --- /dev/null +++ b/src/uu/sha512sum/src/main.rs @@ -0,0 +1 @@ +uucore::bin!(uu_sha512sum); diff --git a/src/uu/sha512sum/src/sha512sum.rs b/src/uu/sha512sum/src/sha512sum.rs new file mode 100644 index 00000000000..125d263f0e2 --- /dev/null +++ b/src/uu/sha512sum/src/sha512sum.rs @@ -0,0 +1 @@ +uu_checksum_common::declare_standalone!("sha512sum", uucore::checksum::AlgoKind::Sha512); diff --git a/src/uu/shuf/Cargo.toml b/src/uu/shuf/Cargo.toml index b67b1d80811..b271d9b9b57 100644 --- a/src/uu/shuf/Cargo.toml +++ b/src/uu/shuf/Cargo.toml @@ -19,8 +19,11 @@ path = "src/shuf.rs" [dependencies] clap = { workspace = true } +itoa = { workspace = true } rand = { workspace = true } +rand_chacha = { workspace = true } rand_core = { workspace = true } +sha3 = { workspace = true } uucore = { workspace = true } fluent = { workspace = true } @@ -34,5 +37,4 @@ harness = false [dev-dependencies] divan = { workspace = true } -tempfile = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } diff --git a/src/uu/shuf/locales/en-US.ftl b/src/uu/shuf/locales/en-US.ftl index 24876e6a37f..de322117983 100644 --- a/src/uu/shuf/locales/en-US.ftl +++ b/src/uu/shuf/locales/en-US.ftl @@ -10,6 +10,7 @@ shuf-help-echo = treat each ARG as an input line shuf-help-input-range = treat each number LO through HI as an input line shuf-help-head-count = output at most COUNT lines shuf-help-output = write result to FILE instead of standard output +shuf-help-random-seed = seed with STRING for reproducible output shuf-help-random-source = get random bytes from FILE shuf-help-repeat = output lines can be repeated shuf-help-zero-terminated = line delimiter is NUL, not newline @@ -19,6 +20,8 @@ shuf-error-unexpected-argument = unexpected argument { $arg } found shuf-error-failed-to-open-for-writing = failed to open { $file } for writing shuf-error-failed-to-open-random-source = failed to open random source { $file } shuf-error-read-error = read error +shuf-error-read-random-bytes = reading random bytes failed +shuf-error-end-of-random-bytes = end of random source shuf-error-no-lines-to-repeat = no lines to repeat shuf-error-start-exceeds-end = start exceeds end shuf-error-missing-dash = missing '-' diff --git a/src/uu/shuf/src/compat_random_source.rs b/src/uu/shuf/src/compat_random_source.rs new file mode 100644 index 00000000000..73a7191be73 --- /dev/null +++ b/src/uu/shuf/src/compat_random_source.rs @@ -0,0 +1,123 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::{io::BufRead, ops::RangeInclusive}; + +use uucore::error::{FromIo, UResult, USimpleError}; +use uucore::translate; + +/// A uniform integer generator that tries to exactly match GNU shuf's --random-source. +/// +/// It's not particularly efficient and possibly not quite uniform. It should *only* be +/// used for compatibility with GNU: other modes shouldn't touch this code. +/// +/// All the logic here was black box reverse engineered. It might not match up in all edge +/// cases but it gives identical results on many different large and small inputs. +/// +/// It seems that GNU uses fairly textbook rejection sampling to generate integers, reading +/// one byte at a time until it has enough entropy, and recycling leftover entropy after +/// accepting or rejecting a value. +/// +/// To do your own experiments, start with commands like these: +/// +/// printf '\x01\x02\x03\x04' | shuf -i0-255 -r --random-source=/dev/stdin +/// +/// Then vary the integer range and the input and the input length. It can be useful to +/// see when exactly shuf crashes with an "end of file" error. +/// +/// To spot small inconsistencies it's useful to run: +/// +/// diff -y <(my_shuf ...) <(shuf -i0-{MAX} -r --random-source={INPUT}) | head -n 50 +pub struct RandomSourceAdapter { + reader: R, + state: u64, + entropy: u64, +} + +impl RandomSourceAdapter { + pub fn new(reader: R) -> Self { + Self { + reader, + state: 0, + entropy: 0, + } + } +} + +impl RandomSourceAdapter { + fn generate_at_most(&mut self, at_most: u64) -> UResult { + while self.entropy < at_most { + let buf = self + .reader + .fill_buf() + .map_err_context(|| translate!("shuf-error-read-random-bytes"))?; + let Some(&byte) = buf.first() else { + return Err(USimpleError::new( + 1, + translate!("shuf-error-end-of-random-bytes"), + )); + }; + self.reader.consume(1); + // Is overflow OK here? Won't it cause bias? (Seems to work out...) + self.state = self.state.wrapping_mul(256).wrapping_add(byte as u64); + self.entropy = self.entropy.wrapping_mul(256).wrapping_add(255); + } + + if at_most == u64::MAX { + // at_most + 1 would overflow but this case is easy. + let val = self.state; + self.entropy = 0; + self.state = 0; + return Ok(val); + } + + let num_possibilities = at_most + 1; + + // If the generated number falls within this margin at the upper end of the + // range then we retry to avoid modulo bias. + let margin = ((self.entropy as u128 + 1) % num_possibilities as u128) as u64; + let safe_zone = self.entropy - margin; + + if self.state <= safe_zone { + let val = self.state % num_possibilities; + // Reuse the rest of the state. + self.state /= num_possibilities; + // We need this subtraction, otherwise we consume new input slightly more + // slowly than GNU. Not sure if it checks out mathematically. + self.entropy -= at_most; + self.entropy /= num_possibilities; + Ok(val) + } else { + self.state %= num_possibilities; + self.entropy %= num_possibilities; + // I sure hope the compiler optimizes this tail call. + self.generate_at_most(at_most) + } + } + + pub fn choose_from_range(&mut self, range: RangeInclusive) -> UResult { + let offset = self.generate_at_most(*range.end() - *range.start())?; + Ok(*range.start() + offset) + } + + pub fn choose_from_slice(&mut self, vals: &[T]) -> UResult { + assert!(!vals.is_empty()); + let idx = self.generate_at_most(vals.len() as u64 - 1)? as usize; + Ok(vals[idx]) + } + + pub fn shuffle<'a, T>(&mut self, vals: &'a mut [T], amount: usize) -> UResult<&'a mut [T]> { + // Fisher-Yates shuffle. + // TODO: GNU does something different if amount <= vals.len() and the input is stdin. + // The order changes completely and depends on --head-count. + // No clue what they might do differently and why. + let amount = amount.min(vals.len()); + for idx in 0..amount { + let other_idx = self.generate_at_most((vals.len() - idx - 1) as u64)? as usize + idx; + vals.swap(idx, other_idx); + } + Ok(&mut vals[..amount]) + } +} diff --git a/src/uu/shuf/src/nonrepeating_iterator.rs b/src/uu/shuf/src/nonrepeating_iterator.rs new file mode 100644 index 00000000000..d05844ba9d1 --- /dev/null +++ b/src/uu/shuf/src/nonrepeating_iterator.rs @@ -0,0 +1,111 @@ +use std::collections::HashMap; +use std::ops::RangeInclusive; + +use uucore::error::UResult; + +use crate::WrappedRng; + +/// An iterator that samples from an integer range without repetition. +/// +/// This is based on Fisher-Yates, and it's required for backward compatibility +/// that it behaves exactly like Fisher-Yates if --random-source or --random-seed +/// is used. But we have a few tricks: +/// +/// - In the beginning we use a hash table instead of an array. This way we lazily +/// keep track of swaps without allocating the entire range upfront. +/// +/// - When the hash table starts to get big relative to the remaining items +/// we switch over to an array. +/// +/// - We store the array backwards so that we can shrink it as we go and free excess +/// memory every now and then. +/// +/// Both the hash table and the array give the same output. +/// +/// There's room for optimization: +/// +/// - Switching over from the hash table to the array is costly. If we happen to know +/// (through --head-count) that only few draws remain then it would be better not +/// to switch. +/// +/// - If the entire range gets used then we might as well allocate an array to start +/// with. But if the user e.g. pipes through `head` rather than using --head-count +/// we can't know whether that's the case, so there's a tradeoff. +/// +/// GNU decides the other way: --head-count is noticeably faster than | head. +pub(crate) struct NonrepeatingIterator<'a> { + rng: &'a mut WrappedRng, + values: Values, +} + +enum Values { + Full(Vec), + Sparse(RangeInclusive, HashMap), +} + +impl<'a> NonrepeatingIterator<'a> { + pub(crate) fn new(range: RangeInclusive, rng: &'a mut WrappedRng) -> Self { + let values = Values::Sparse(range, HashMap::default()); + NonrepeatingIterator { rng, values } + } + + fn produce(&mut self) -> UResult { + match &mut self.values { + Values::Full(items) => { + let this_idx = items.len() - 1; + + let other_idx = self.rng.choose_from_range(0..=items.len() as u64 - 1)? as usize; + // Flip the index to pretend we're going left-to-right + let other_idx = items.len() - other_idx - 1; + + items.swap(this_idx, other_idx); + + let val = items.pop().unwrap(); + if items.len().is_power_of_two() && items.len() >= 512 { + items.shrink_to_fit(); + } + Ok(val) + } + Values::Sparse(range, items) => { + let this_idx = *range.start(); + let this_val = items.remove(&this_idx).unwrap_or(this_idx); + + let other_idx = self.rng.choose_from_range(range.clone())?; + + let val = if this_idx == other_idx { + this_val + } else { + items.insert(other_idx, this_val).unwrap_or(other_idx) + }; + *range = *range.start() + 1..=*range.end(); + + Ok(val) + } + } + } +} + +impl Iterator for NonrepeatingIterator<'_> { + type Item = UResult; + + fn next(&mut self) -> Option { + match &self.values { + Values::Full(items) if items.is_empty() => return None, + Values::Full(_) => (), + Values::Sparse(range, _) if range.is_empty() => return None, + Values::Sparse(range, items) => { + let range_len = range.size_hint().0 as u64; + if items.len() as u64 >= range_len / 8 { + self.values = Values::Full(hashmap_to_vec(range.clone(), items)); + } + } + } + + Some(self.produce()) + } +} + +fn hashmap_to_vec(range: RangeInclusive, map: &HashMap) -> Vec { + let lookup = |idx| *map.get(&idx).unwrap_or(&idx); + range.rev().map(lookup).collect() +} diff --git a/src/uu/shuf/src/rand_read_adapter.rs b/src/uu/shuf/src/rand_read_adapter.rs deleted file mode 100644 index 3f504c03d2b..00000000000 --- a/src/uu/shuf/src/rand_read_adapter.rs +++ /dev/null @@ -1,142 +0,0 @@ -// This file is part of the uutils coreutils package. -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. -// Copyright 2018 Developers of the Rand project. -// Copyright 2013 The Rust Project Developers. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! A wrapper around any Read to treat it as an RNG. - -use std::fmt; -use std::io::Read; - -use rand_core::{RngCore, impls}; - -/// An RNG that reads random bytes straight from any type supporting -/// [`std::io::Read`], for example files. -/// -/// This will work best with an infinite reader, but that is not required. -/// -/// This can be used with `/dev/urandom` on Unix but it is recommended to use -/// [`OsRng`] instead. -/// -/// # Panics -/// -/// `ReadRng` uses [`std::io::Read::read_exact`], which retries on interrupts. -/// All other errors from the underlying reader, including when it does not -/// have enough data, will only be reported through `try_fill_bytes`. -/// The other [`RngCore`] methods will panic in case of an error. -/// -/// [`OsRng`]: rand::rngs::OsRng -#[derive(Debug)] -pub struct ReadRng { - reader: R, -} - -impl ReadRng { - /// Create a new `ReadRng` from a `Read`. - pub fn new(r: R) -> Self { - Self { reader: r } - } - - fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), ReadError> { - if dest.is_empty() { - return Ok(()); - } - // Use `std::io::read_exact`, which retries on `ErrorKind::Interrupted`. - self.reader.read_exact(dest).map_err(ReadError) - } -} - -impl RngCore for ReadRng { - fn next_u32(&mut self) -> u32 { - impls::next_u32_via_fill(self) - } - - fn next_u64(&mut self) -> u64 { - impls::next_u64_via_fill(self) - } - - fn fill_bytes(&mut self, dest: &mut [u8]) { - self.try_fill_bytes(dest).unwrap_or_else(|err| { - panic!("reading random bytes from Read implementation failed; error: {err}"); - }); - } -} - -/// `ReadRng` error type -#[derive(Debug)] -pub struct ReadError(std::io::Error); - -impl fmt::Display for ReadError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "ReadError: {}", self.0) - } -} - -impl std::error::Error for ReadError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - Some(&self.0) - } -} - -#[cfg(test)] -mod test { - use std::println; - - use super::ReadRng; - use rand::RngCore; - - #[test] - fn test_reader_rng_u64() { - // transmute from the target to avoid endianness concerns. - #[rustfmt::skip] - let v = [0u8, 0, 0, 0, 0, 0, 0, 1, - 0, 4, 0, 0, 3, 0, 0, 2, - 5, 0, 0, 0, 0, 0, 0, 0]; - let mut rng = ReadRng::new(&v[..]); - - assert_eq!(rng.next_u64(), 1 << 56); - assert_eq!(rng.next_u64(), (2 << 56) + (3 << 32) + (4 << 8)); - assert_eq!(rng.next_u64(), 5); - } - - #[test] - fn test_reader_rng_u32() { - let v = [0u8, 0, 0, 1, 0, 0, 2, 0, 3, 0, 0, 0]; - let mut rng = ReadRng::new(&v[..]); - - assert_eq!(rng.next_u32(), 1 << 24); - assert_eq!(rng.next_u32(), 2 << 16); - assert_eq!(rng.next_u32(), 3); - } - - #[test] - fn test_reader_rng_fill_bytes() { - let v = [1u8, 2, 3, 4, 5, 6, 7, 8]; - let mut w = [0u8; 8]; - - let mut rng = ReadRng::new(&v[..]); - rng.fill_bytes(&mut w); - - assert_eq!(v, w); - } - - #[test] - fn test_reader_rng_insufficient_bytes() { - let v = [1u8, 2, 3, 4, 5, 6, 7, 8]; - let mut w = [0u8; 9]; - - let mut rng = ReadRng::new(&v[..]); - - let result = rng.try_fill_bytes(&mut w); - assert!(result.is_err()); - println!("Error: {}", result.unwrap_err()); - } -} diff --git a/src/uu/shuf/src/random_seed.rs b/src/uu/shuf/src/random_seed.rs new file mode 100644 index 00000000000..dbc6c728c19 --- /dev/null +++ b/src/uu/shuf/src/random_seed.rs @@ -0,0 +1,115 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::ops::RangeInclusive; + +use rand::{RngCore as _, SeedableRng as _}; +use rand_chacha::ChaCha12Rng; +use sha3::{Digest as _, Sha3_256}; + +/// Reproducible seeded random number generation. +/// +/// The behavior should stay the same between releases, so don't change it without +/// a very good reason. +/// +/// # How it works +/// +/// - Take a Unicode string as the seed. +/// +/// - Encode this seed as UTF-8. +/// +/// - Take the SHA3-256 hash of the encoded seed. +/// +/// - Use that hash as the input for a [`rand_chacha`] ChaCha12 RNG. +/// (We don't touch the nonce, so that's probably zero.) +/// +/// - Take 64-bit samples from the RNG. +/// +/// - Use Lemire's method to generate uniformly distributed integers and: +/// +/// - With --repeat, use these to pick elements from ranges. +/// +/// - Without --repeat, use these to do left-to-right modern Fisher-Yates. +/// +/// # Why it works like this +/// +/// - Unicode string: Greatest common denominator between platforms. Windows doesn't +/// let you pass raw bytes as a CLI argument and that would be bad practice anyway. +/// A decimal or hex number would work but this is much more flexible without being +/// unmanageable. +/// +/// (Footgun: if the user passes a filename we won't read from the file but the +/// command will run anyway.) +/// +/// - UTF-8: That's what Rust likes and it's the least unreasonable Unicode encoding. +/// +/// - SHA3-256: We want to make good use of the entire user input and SHA-3 is +/// state of the art. ChaCha12 takes a 256-bit seed. +/// +/// - ChaCha12: [`rand`]'s default rng as of writing. Seems state of the art. +/// +/// - 64-bit samples: We could often get away with 32-bit samples but let's keep things +/// simple and only use one width. (There doesn't seem to be much of a performance hit.) +/// +/// - Lemire, Fisher-Yates: These are very easy to implement and maintain ourselves. +/// `rand` provides fancier implementations but only promises reproducibility within +/// patch releases: +/// +/// Strictly speaking even `ChaCha12` is subject to breakage. But since it's a very +/// specific algorithm I assume it's safe in practice. +pub struct SeededRng(Box); + +impl SeededRng { + pub fn new(seed: &str) -> Self { + let mut hasher = Sha3_256::new(); + hasher.update(seed.as_bytes()); + let seed = hasher.finalize(); + let seed = seed.as_slice().try_into().unwrap(); + Self(Box::new(rand_chacha::ChaCha12Rng::from_seed(seed))) + } + + #[allow(clippy::many_single_char_names)] // use original lemire names for easy comparison + fn generate_at_most(&mut self, at_most: u64) -> u64 { + if at_most == u64::MAX { + return self.0.next_u64(); + } + + // https://lemire.me/blog/2019/06/06/nearly-divisionless-random-integer-generation-on-various-systems/ + let s: u64 = at_most + 1; + let mut x: u64 = self.0.next_u64(); + let mut m: u128 = u128::from(x) * u128::from(s); + let mut l: u64 = m as u64; + if l < s { + let t: u64 = s.wrapping_neg() % s; + while l < t { + x = self.0.next_u64(); + m = u128::from(x) * u128::from(s); + l = m as u64; + } + } + (m >> 64) as u64 + } + + pub fn choose_from_range(&mut self, range: RangeInclusive) -> u64 { + let offset = self.generate_at_most(*range.end() - *range.start()); + *range.start() + offset + } + + pub fn choose_from_slice(&mut self, vals: &[T]) -> T { + assert!(!vals.is_empty()); + let idx = self.generate_at_most(vals.len() as u64 - 1) as usize; + vals[idx] + } + + pub fn shuffle<'a, T>(&mut self, vals: &'a mut [T], amount: usize) -> &'a mut [T] { + // Fisher-Yates shuffle. + let amount = amount.min(vals.len()); + for idx in 0..amount { + let other_idx = self.generate_at_most((vals.len() - idx - 1) as u64) as usize + idx; + vals.swap(idx, other_idx); + } + &mut vals[..amount] + } +} diff --git a/src/uu/shuf/src/shuf.rs b/src/uu/shuf/src/shuf.rs index 4fd5ca85a0f..970a623e254 100644 --- a/src/uu/shuf/src/shuf.rs +++ b/src/uu/shuf/src/shuf.rs @@ -5,45 +5,62 @@ // spell-checker:ignore (ToDO) cmdline evec nonrepeating seps shufable rvec fdata -use clap::builder::ValueParser; -use clap::{Arg, ArgAction, Command}; -use rand::prelude::SliceRandom; -use rand::seq::IndexedRandom; -use rand::{Rng, RngCore}; -use std::collections::HashSet; use std::ffi::{OsStr, OsString}; use std::fs::File; -use std::io::{BufWriter, Error, Read, Write, stdin, stdout}; +use std::io::{BufReader, BufWriter, Error, Read, Write, stdin, stdout}; use std::ops::RangeInclusive; use std::path::{Path, PathBuf}; use std::str::FromStr; + +use clap::{Arg, ArgAction, Command, builder::ValueParser}; +use rand::rngs::ThreadRng; +use rand::{ + Rng, + seq::{IndexedRandom, SliceRandom}, +}; + use uucore::display::{OsWrite, Quotable}; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::format_usage; use uucore::translate; -mod rand_read_adapter; +mod compat_random_source; +mod nonrepeating_iterator; +mod random_seed; + +use compat_random_source::RandomSourceAdapter; +use nonrepeating_iterator::NonrepeatingIterator; +use random_seed::SeededRng; enum Mode { Default(PathBuf), Echo(Vec), - InputRange(RangeInclusive), + InputRange(RangeInclusive), } +const BUF_SIZE: usize = 64 * 1024; + struct Options { - head_count: usize, + head_count: u64, output: Option, - random_source: Option, + random_source: RandomSource, repeat: bool, sep: u8, } +enum RandomSource { + None, + Seed(String), + File(PathBuf), +} + mod options { pub static ECHO: &str = "echo"; pub static INPUT_RANGE: &str = "input-range"; pub static HEAD_COUNT: &str = "head-count"; pub static OUTPUT: &str = "output"; pub static RANDOM_SOURCE: &str = "random-source"; + pub static RANDOM_SEED: &str = "random-seed"; pub static REPEAT: &str = "repeat"; pub static ZERO_TERMINATED: &str = "zero-terminated"; pub static FILE_OR_ARGS: &str = "file-or-args"; @@ -77,19 +94,27 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { Mode::Default(file.into()) }; + let random_source = if let Some(filename) = matches.get_one(options::RANDOM_SOURCE).cloned() { + RandomSource::File(filename) + } else if let Some(seed) = matches.get_one(options::RANDOM_SEED).cloned() { + RandomSource::Seed(seed) + } else { + RandomSource::None + }; + let options = Options { // GNU shuf takes the lowest value passed, so we imitate that. // It's probably a bug or an implementation artifact though. // Busybox takes the final value which is more typical: later // options override earlier options. head_count: matches - .get_many::(options::HEAD_COUNT) + .get_many::(options::HEAD_COUNT) .unwrap_or_default() .copied() .min() - .unwrap_or(usize::MAX), + .unwrap_or(u64::MAX), output: matches.get_one(options::OUTPUT).cloned(), - random_source: matches.get_one(options::RANDOM_SOURCE).cloned(), + random_source, repeat: matches.get_flag(options::REPEAT), sep: if matches.get_flag(options::ZERO_TERMINATED) { b'\0' @@ -98,15 +123,18 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }, }; - let mut output = BufWriter::new(match options.output { - None => Box::new(stdout()) as Box, - Some(ref s) => { - let file = File::create(s).map_err_context( - || translate!("shuf-error-failed-to-open-for-writing", "file" => s.quote()), - )?; - Box::new(file) as Box - } - }); + let mut output = BufWriter::with_capacity( + BUF_SIZE, + match options.output { + None => Box::new(stdout()) as Box, + Some(ref s) => { + let file = File::create(s).map_err_context( + || translate!("shuf-error-failed-to-open-for-writing", "file" => s.quote()), + )?; + Box::new(file) as Box + } + }, + ); if options.head_count == 0 { // In this case we do want to touch the output file but we can quit immediately. @@ -114,13 +142,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } let mut rng = match options.random_source { - Some(ref r) => { + RandomSource::None => WrappedRng::Default(rand::rng()), + RandomSource::Seed(ref seed) => WrappedRng::Seed(SeededRng::new(seed)), + RandomSource::File(ref r) => { let file = File::open(r).map_err_context( || translate!("shuf-error-failed-to-open-random-source", "file" => r.quote()), )?; - WrappedRng::RngFile(rand_read_adapter::ReadRng::new(file)) + let file = BufReader::new(file); + WrappedRng::File(compat_random_source::RandomSourceAdapter::new(file)) } - None => WrappedRng::RngDefault(rand::rng()), }; match mode { @@ -173,7 +203,7 @@ pub fn uu_app() -> Command { .value_name("COUNT") .action(ArgAction::Append) .help(translate!("shuf-help-head-count")) - .value_parser(usize::from_str), + .value_parser(u64::from_str), ) .arg( Arg::new(options::OUTPUT) @@ -184,6 +214,15 @@ pub fn uu_app() -> Command { .value_parser(ValueParser::path_buf()) .value_hint(clap::ValueHint::FilePath), ) + .arg( + Arg::new(options::RANDOM_SEED) + .long(options::RANDOM_SEED) + .value_name("STRING") + .help(translate!("shuf-help-random-seed")) + .value_parser(ValueParser::string()) + .value_hint(clap::ValueHint::Other) + .conflicts_with(options::RANDOM_SOURCE), + ) .arg( Arg::new(options::RANDOM_SOURCE) .long(options::RANDOM_SOURCE) @@ -243,12 +282,15 @@ fn split_seps(data: &[u8], sep: u8) -> Vec<&[u8]> { trait Shufable { type Item: Writable; fn is_empty(&self) -> bool; - fn choose(&self, rng: &mut WrappedRng) -> Self::Item; + fn choose(&self, rng: &mut WrappedRng) -> UResult; + // In some modes we shuffle ahead of time and in some as we generate + // so we unfortunately need to double-wrap UResult. + // But it's monomorphized so the optimizer will hopefully Take Care Of Itâ„¢. fn partial_shuffle<'b>( &'b mut self, rng: &'b mut WrappedRng, - amount: usize, - ) -> impl Iterator; + amount: u64, + ) -> UResult>>; } impl<'a> Shufable for Vec<&'a [u8]> { @@ -258,20 +300,22 @@ impl<'a> Shufable for Vec<&'a [u8]> { (**self).is_empty() } - fn choose(&self, rng: &mut WrappedRng) -> Self::Item { - // Note: "copied()" only copies the reference, not the entire [u8]. - // Returns None if the slice is empty. We checked this before, so - // this is safe. - (**self).choose(rng).unwrap() + fn choose(&self, rng: &mut WrappedRng) -> UResult { + rng.choose(self) } fn partial_shuffle<'b>( &'b mut self, rng: &'b mut WrappedRng, - amount: usize, - ) -> impl Iterator { - // Note: "copied()" only copies the reference, not the entire [u8]. - (**self).partial_shuffle(rng, amount).0.iter().copied() + amount: u64, + ) -> UResult>> { + // On 32-bit platforms it's possible that amount > usize::MAX. + // We saturate as usize::MAX since all of our shuffling modes require storing + // elements in memory so more than usize::MAX elements won't fit anyway. + // (With --repeat an output larger than usize::MAX is possible. But --repeat + // uses `choose()`.) + let amount = usize::try_from(amount).unwrap_or(usize::MAX); + Ok(rng.shuffle(self, amount)?.iter().copied().map(Ok)) } } @@ -282,128 +326,41 @@ impl<'a> Shufable for Vec<&'a OsStr> { (**self).is_empty() } - fn choose(&self, rng: &mut WrappedRng) -> Self::Item { - (**self).choose(rng).unwrap() + fn choose(&self, rng: &mut WrappedRng) -> UResult { + rng.choose(self) } fn partial_shuffle<'b>( &'b mut self, rng: &'b mut WrappedRng, - amount: usize, - ) -> impl Iterator { - (**self).partial_shuffle(rng, amount).0.iter().copied() + amount: u64, + ) -> UResult>> { + let amount = usize::try_from(amount).unwrap_or(usize::MAX); + Ok(rng.shuffle(self, amount)?.iter().copied().map(Ok)) } } -impl Shufable for RangeInclusive { - type Item = usize; +impl Shufable for RangeInclusive { + type Item = u64; fn is_empty(&self) -> bool { self.is_empty() } - fn choose(&self, rng: &mut WrappedRng) -> usize { - rng.random_range(self.clone()) + fn choose(&self, rng: &mut WrappedRng) -> UResult { + rng.choose_from_range(self.clone()) } fn partial_shuffle<'b>( &'b mut self, rng: &'b mut WrappedRng, - amount: usize, - ) -> impl Iterator { - NonrepeatingIterator::new(self.clone(), rng, amount) + amount: u64, + ) -> UResult>> { + let amount = usize::try_from(amount).unwrap_or(usize::MAX); + Ok(NonrepeatingIterator::new(self.clone(), rng).take(amount)) } } -enum NumberSet { - AlreadyListed(HashSet), - Remaining(Vec), -} - -struct NonrepeatingIterator<'a> { - range: RangeInclusive, - rng: &'a mut WrappedRng, - remaining_count: usize, - buf: NumberSet, -} - -impl<'a> NonrepeatingIterator<'a> { - fn new(range: RangeInclusive, rng: &'a mut WrappedRng, amount: usize) -> Self { - let capped_amount = if range.start() > range.end() { - 0 - } else if range == (0..=usize::MAX) { - amount - } else { - amount.min(range.end() - range.start() + 1) - }; - NonrepeatingIterator { - range, - rng, - remaining_count: capped_amount, - buf: NumberSet::AlreadyListed(HashSet::default()), - } - } - - fn produce(&mut self) -> usize { - debug_assert!(self.range.start() <= self.range.end()); - match &mut self.buf { - NumberSet::AlreadyListed(already_listed) => { - let chosen = loop { - let guess = self.rng.random_range(self.range.clone()); - let newly_inserted = already_listed.insert(guess); - if newly_inserted { - break guess; - } - }; - // Once a significant fraction of the interval has already been enumerated, - // the number of attempts to find a number that hasn't been chosen yet increases. - // Therefore, we need to switch at some point from "set of already returned values" to "list of remaining values". - let range_size = (self.range.end() - self.range.start()).saturating_add(1); - if number_set_should_list_remaining(already_listed.len(), range_size) { - let mut remaining = self - .range - .clone() - .filter(|n| !already_listed.contains(n)) - .collect::>(); - assert!(remaining.len() >= self.remaining_count); - remaining.partial_shuffle(&mut self.rng, self.remaining_count); - remaining.truncate(self.remaining_count); - self.buf = NumberSet::Remaining(remaining); - } - chosen - } - NumberSet::Remaining(remaining_numbers) => { - debug_assert!(!remaining_numbers.is_empty()); - // We only enter produce() when there is at least one actual element remaining, so popping must always return an element. - remaining_numbers.pop().unwrap() - } - } - } -} - -impl Iterator for NonrepeatingIterator<'_> { - type Item = usize; - - fn next(&mut self) -> Option { - if self.range.is_empty() || self.remaining_count == 0 { - return None; - } - self.remaining_count -= 1; - Some(self.produce()) - } -} - -// This could be a method, but it is much easier to test as a stand-alone function. -fn number_set_should_list_remaining(listed_count: usize, range_size: usize) -> bool { - // Arbitrarily determine the switchover point to be around 25%. This is because: - // - HashSet has a large space overhead for the hash table load factor. - // - This means that somewhere between 25-40%, the memory required for a "positive" HashSet and a "negative" Vec should be the same. - // - HashSet has a small but non-negligible overhead for each lookup, so we have a slight preference for Vec anyway. - // - At 25%, on average 1.33 attempts are needed to find a number that hasn't been taken yet. - // - Finally, "24%" is computationally the simplest: - listed_count >= range_size / 4 -} - trait Writable { fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error>; } @@ -420,39 +377,32 @@ impl Writable for &OsStr { } } -impl Writable for usize { +impl Writable for u64 { + #[inline] fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error> { - let mut n = *self; - - // Handle the zero case explicitly - if n == 0 { - return output.write_all(b"0"); - } - - // Maximum number of digits for u64 is 20 (18446744073709551615) - let mut buf = [0u8; 20]; - let mut i = 20; - - // Write digits from right to left - while n > 0 { - i -= 1; - buf[i] = b'0' + (n % 10) as u8; - n /= 10; - } - - // Write the relevant part of the buffer to output - output.write_all(&buf[i..]) + // The itoa crate is surprisingly much more efficient than a formatted write. + // It speeds up `shuf -r -n1000000 -i1-1024` by 1.8×. + let mut buf = itoa::Buffer::new(); + output.write_all(buf.format(*self).as_bytes()) } } +#[cold] +#[inline(never)] +fn handle_write_error(e: std::io::Error) -> Box { + use uucore::error::FromIo; + let ctx = translate!("shuf-error-write-failed"); + e.map_err_context(move || ctx) +} + +#[inline(never)] fn shuf_exec( input: &mut impl Shufable, opts: &Options, rng: &mut WrappedRng, output: &mut BufWriter>, ) -> UResult<()> { - let ctx = || translate!("shuf-error-write-failed"); - + let sep = [opts.sep]; if opts.repeat { if input.is_empty() { return Err(USimpleError::new( @@ -461,26 +411,28 @@ fn shuf_exec( )); } for _ in 0..opts.head_count { - let r = input.choose(rng); - - r.write_all_to(output).map_err_context(ctx)?; - output.write_all(&[opts.sep]).map_err_context(ctx)?; + let r = input.choose(rng)?; + r.write_all_to(output).map_err(handle_write_error)?; + output.write_all(&sep).map_err(handle_write_error)?; } } else { - let shuffled = input.partial_shuffle(rng, opts.head_count); + let shuffled = input.partial_shuffle(rng, opts.head_count)?; + for r in shuffled { - r.write_all_to(output).map_err_context(ctx)?; - output.write_all(&[opts.sep]).map_err_context(ctx)?; + let r = r?; + r.write_all_to(output).map_err(handle_write_error)?; + output.write_all(&sep).map_err(handle_write_error)?; } } + output.flush().map_err(handle_write_error)?; Ok(()) } -fn parse_range(input_range: &str) -> Result, String> { +fn parse_range(input_range: &str) -> Result, String> { if let Some((from, to)) = input_range.split_once('-') { - let begin = from.parse::().map_err(|e| e.to_string())?; - let end = to.parse::().map_err(|e| e.to_string())?; + let begin = from.parse::().map_err(|e| e.to_string())?; + let end = to.parse::().map_err(|e| e.to_string())?; if begin <= end || begin == end + 1 { Ok(begin..=end) } else { @@ -492,29 +444,33 @@ fn parse_range(input_range: &str) -> Result, String> { } enum WrappedRng { - RngFile(rand_read_adapter::ReadRng), - RngDefault(rand::rngs::ThreadRng), + Default(ThreadRng), + Seed(SeededRng), + File(RandomSourceAdapter>), } -impl RngCore for WrappedRng { - fn next_u32(&mut self) -> u32 { +impl WrappedRng { + fn choose(&mut self, vals: &[T]) -> UResult { match self { - Self::RngFile(r) => r.next_u32(), - Self::RngDefault(r) => r.next_u32(), + Self::Default(rng) => Ok(*vals.choose(rng).unwrap()), + Self::Seed(rng) => Ok(rng.choose_from_slice(vals)), + Self::File(rng) => rng.choose_from_slice(vals), } } - fn next_u64(&mut self) -> u64 { + fn shuffle<'a, T>(&mut self, vals: &'a mut [T], amount: usize) -> UResult<&'a mut [T]> { match self { - Self::RngFile(r) => r.next_u64(), - Self::RngDefault(r) => r.next_u64(), + Self::Default(rng) => Ok(vals.partial_shuffle(rng, amount).0), + Self::Seed(rng) => Ok(rng.shuffle(vals, amount)), + Self::File(rng) => rng.shuffle(vals, amount), } } - fn fill_bytes(&mut self, dest: &mut [u8]) { + fn choose_from_range(&mut self, range: RangeInclusive) -> UResult { match self { - Self::RngFile(r) => r.fill_bytes(dest), - Self::RngDefault(r) => r.fill_bytes(dest), + Self::Default(rng) => Ok(rng.random_range(range)), + Self::Seed(rng) => Ok(rng.choose_from_range(range)), + Self::File(rng) => rng.choose_from_range(range), } } } @@ -543,85 +499,3 @@ mod test_split_seps { assert_eq!(split_seps(b"a\nb\nc", b'\n'), &[b"a", b"b", b"c"]); } } - -#[cfg(test)] -// Since the computed value is a bool, it is more readable to write the expected value out: -#[allow(clippy::bool_assert_comparison)] -mod test_number_set_decision { - use super::number_set_should_list_remaining; - - #[test] - fn test_stay_positive_large_remaining_first() { - assert_eq!(false, number_set_should_list_remaining(0, usize::MAX)); - } - - #[test] - fn test_stay_positive_large_remaining_second() { - assert_eq!(false, number_set_should_list_remaining(1, usize::MAX)); - } - - #[test] - fn test_stay_positive_large_remaining_tenth() { - assert_eq!(false, number_set_should_list_remaining(9, usize::MAX)); - } - - #[test] - fn test_stay_positive_smallish_range_first() { - assert_eq!(false, number_set_should_list_remaining(0, 12345)); - } - - #[test] - fn test_stay_positive_smallish_range_second() { - assert_eq!(false, number_set_should_list_remaining(1, 12345)); - } - - #[test] - fn test_stay_positive_smallish_range_tenth() { - assert_eq!(false, number_set_should_list_remaining(9, 12345)); - } - - #[test] - fn test_stay_positive_small_range_not_too_early() { - assert_eq!(false, number_set_should_list_remaining(1, 10)); - } - - // Don't want to test close to the border, in case we decide to change the threshold. - // However, at 50% coverage, we absolutely should switch: - #[test] - fn test_switch_half() { - assert_eq!(true, number_set_should_list_remaining(1234, 2468)); - } - - // Ensure that the decision is monotonous: - #[test] - fn test_switch_late1() { - assert_eq!(true, number_set_should_list_remaining(12340, 12345)); - } - - #[test] - fn test_switch_late2() { - assert_eq!(true, number_set_should_list_remaining(12344, 12345)); - } - - // Ensure that we are overflow-free: - #[test] - fn test_no_crash_exceed_max_size1() { - assert_eq!(false, number_set_should_list_remaining(12345, usize::MAX)); - } - - #[test] - fn test_no_crash_exceed_max_size2() { - assert_eq!( - true, - number_set_should_list_remaining(usize::MAX - 1, usize::MAX) - ); - } - - #[test] - fn test_no_crash_exceed_max_size3() { - assert_eq!( - true, - number_set_should_list_remaining(usize::MAX, usize::MAX) - ); - } -} diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index ad1dcc11872..e487a1bfe49 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -20,6 +20,7 @@ workspace = true path = "src/sort.rs" [features] +default = ["i18n-collator"] i18n-collator = ["uucore/i18n-collator"] [dependencies] @@ -35,7 +36,6 @@ rayon = { workspace = true } self_cell = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } -unicode-width = { workspace = true } uucore = { workspace = true, features = [ "fs", "parser-size", diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 8c27910dce8..ac21c395a51 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -47,7 +47,6 @@ use uucore::display::Quotable; use uucore::error::{FromIo, strip_errno}; use uucore::error::{UError, UResult, USimpleError, UUsageError}; use uucore::extendedbigdecimal::ExtendedBigDecimal; -use uucore::format_usage; #[cfg(feature = "i18n-collator")] use uucore::i18n::collator::locale_cmp; use uucore::i18n::decimal::locale_decimal_separator; @@ -59,6 +58,7 @@ use uucore::posix::{MODERN, TRADITIONAL}; use uucore::show_error; use uucore::translate; use uucore::version_cmp::version_cmp; +use uucore::{format_usage, i18n}; use crate::buffer_hint::automatic_buffer_size; use crate::tmp_dir::TmpDirWrapper; @@ -1086,11 +1086,22 @@ impl FieldSelector { }; let mut range_str = &line[self.get_range(line, tokens)]; if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric { + // Get the thousands separator from the locale, handling cases where the separator is empty or multi-character + let locale_thousands_separator = i18n::decimal::locale_grouping_separator().as_bytes(); + + // Upstream GNU coreutils ignore multibyte thousands separators + // (FIXME in C source). We keep the same single-byte behavior. + let thousands_separator = match locale_thousands_separator { + [b] => Some(*b), + _ => None, + }; + // Parse NumInfo for this number. let (info, num_range) = NumInfo::parse( range_str, &NumInfoParseSettings { accept_si_units: self.settings.mode == SortMode::HumanNumeric, + thousands_separator, ..Default::default() }, ); @@ -1846,11 +1857,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), processed_args, 2)?; // Prevent -o/--output to be specified multiple times - if matches - .get_occurrences::(options::OUTPUT) - .is_some_and(|out| out.len() > 1) - { - return Err(SortError::MultipleOutputFiles.into()); + if let Some(mut outputs) = matches.get_many::(options::OUTPUT) { + if let Some(first) = outputs.next() { + if outputs.any(|out| out != first) { + return Err(SortError::MultipleOutputFiles.into()); + } + } } settings.debug = matches.get_flag(options::DEBUG); @@ -2616,17 +2628,17 @@ fn compare_by<'a>( } /// Compare two byte slices in ASCII case-insensitive order without allocating. -/// We lower each byte on the fly so that binary input (including `NUL`) stays +/// We upper each byte on the fly so that binary input (including `NUL`) stays /// untouched and we avoid locale-sensitive routines such as `strcasecmp`. fn ascii_case_insensitive_cmp(a: &[u8], b: &[u8]) -> Ordering { #[inline] - fn lower(byte: u8) -> u8 { - byte.to_ascii_lowercase() + fn fold(byte: u8) -> u8 { + byte.to_ascii_uppercase() } for (lhs, rhs) in a.iter().copied().zip(b.iter().copied()) { - let l = lower(lhs); - let r = lower(rhs); + let l = fold(lhs); + let r = fold(rhs); if l != r { return l.cmp(&r); } diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 6f290a7d5e4..fcab096e2e0 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -54,7 +54,16 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?; match Settings::from(&matches, obs_lines.as_deref()) { - Ok(settings) => split(&settings), + Ok(settings) => { + // When using --filter, we write to a child process's stdin which may + // close early. Disable SIGPIPE so we get EPIPE errors instead of + // being terminated, allowing graceful handling of broken pipes. + #[cfg(unix)] + if settings.filter.is_some() { + let _ = uucore::signals::disable_pipe_errors(); + } + split(&settings) + } Err(e) if e.requires_usage() => Err(UUsageError::new(1, format!("{e}"))), Err(e) => Err(USimpleError::new(1, format!("{e}"))), } @@ -1019,14 +1028,16 @@ impl ManageOutFiles for OutFiles { // Could have hit system limit for open files. // Try to close one previously instantiated writer first for (i, out_file) in self.iter_mut().enumerate() { - if i != idx && out_file.maybe_writer.is_some() { - out_file.maybe_writer.as_mut().unwrap().flush()?; - out_file.maybe_writer = None; - out_file.is_new = false; - count += 1; - - // And then try to instantiate the writer again - continue 'loop1; + if i != idx { + if let Some(writer) = out_file.maybe_writer.as_mut() { + writer.flush()?; + out_file.maybe_writer = None; + out_file.is_new = false; + count += 1; + + // And then try to instantiate the writer again + continue 'loop1; + } } } diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index a7a876b0826..24982e6a4b0 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -1044,7 +1044,10 @@ impl Stater { 'B' => OutputType::Unsigned(512), // SELinux security context string 'C' => { - #[cfg(all(feature = "selinux", target_os = "linux"))] + #[cfg(all( + feature = "selinux", + any(target_os = "linux", target_os = "android") + ))] { if uucore::selinux::is_selinux_enabled() { match uucore::selinux::get_selinux_security_context( @@ -1060,7 +1063,10 @@ impl Stater { OutputType::Str(translate!("stat-selinux-unsupported-system")) } } - #[cfg(not(all(feature = "selinux", target_os = "linux")))] + #[cfg(not(all( + feature = "selinux", + any(target_os = "linux", target_os = "android") + )))] { OutputType::Str(translate!("stat-selinux-unsupported-os")) } diff --git a/src/uu/stty/Cargo.toml b/src/uu/stty/Cargo.toml index f05a4cc5b02..94812b2acc2 100644 --- a/src/uu/stty/Cargo.toml +++ b/src/uu/stty/Cargo.toml @@ -20,9 +20,14 @@ path = "src/stty.rs" [dependencies] clap = { workspace = true } uucore = { workspace = true, features = ["parser"] } -nix = { workspace = true, features = ["term", "ioctl"] } fluent = { workspace = true } +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["ioctl", "term"] } + [[bin]] name = "stty" path = "src/main.rs" + +[build-dependencies] +cfg_aliases = "0.2.1" diff --git a/src/uu/stty/build.rs b/src/uu/stty/build.rs new file mode 100644 index 00000000000..0d26ea321ca --- /dev/null +++ b/src/uu/stty/build.rs @@ -0,0 +1,14 @@ +use cfg_aliases::cfg_aliases; + +fn main() { + cfg_aliases! { + bsd: { any( + target_os = "freebsd", + target_os = "dragonfly", + target_os = "ios", + target_os = "macos", + target_os = "netbsd", + target_os = "openbsd" + ) }, + } +} diff --git a/src/uu/stty/src/stty.rs b/src/uu/stty/src/stty.rs index f34f9b498e5..0c3fea02b70 100644 --- a/src/uu/stty/src/stty.rs +++ b/src/uu/stty/src/stty.rs @@ -36,14 +36,7 @@ use uucore::format_usage; use uucore::parser::num_parser::ExtendedParser; use uucore::translate; -#[cfg(not(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "ios", - target_os = "macos", - target_os = "netbsd", - target_os = "openbsd" -)))] +#[cfg(not(bsd))] use flags::BAUD_RATES; use flags::{CONTROL_CHARS, CONTROL_FLAGS, INPUT_FLAGS, LOCAL_FLAGS, OUTPUT_FLAGS}; @@ -624,26 +617,12 @@ fn print_terminal_size( let mut printer = WrappedPrinter::new(window_size); // BSDs use a u32 for the baud rate, so we can simply print it. - #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "ios", - target_os = "macos", - target_os = "netbsd", - target_os = "openbsd" - ))] + #[cfg(bsd)] printer.print(&translate!("stty-output-speed", "speed" => speed)); // Other platforms need to use the baud rate enum, so printing the right value // becomes slightly more complicated. - #[cfg(not(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "ios", - target_os = "macos", - target_os = "netbsd", - target_os = "openbsd" - )))] + #[cfg(not(bsd))] for (text, baud_rate) in BAUD_RATES { if *baud_rate == speed { printer.print(&translate!("stty-output-speed", "speed" => (*text))); @@ -752,24 +731,10 @@ fn string_to_baud(arg: &str, baud_type: flags::BaudType) -> Option> let value = parse_baud_with_rounding(normalized)?; // BSDs use a u32 for the baud rate, so any decimal number applies. - #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "ios", - target_os = "macos", - target_os = "netbsd", - target_os = "openbsd" - ))] + #[cfg(bsd)] return Some(AllFlags::Baud(value, baud_type)); - #[cfg(not(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "ios", - target_os = "macos", - target_os = "netbsd", - target_os = "openbsd" - )))] + #[cfg(not(bsd))] { for (text, baud_rate) in BAUD_RATES { if text.parse::().ok() == Some(value) { @@ -1440,14 +1405,7 @@ mod tests { // Tests for string_to_baud #[test] fn test_string_to_baud_valid() { - #[cfg(not(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "ios", - target_os = "macos", - target_os = "netbsd", - target_os = "openbsd" - )))] + #[cfg(not(bsd))] { assert!(string_to_baud("9600", flags::BaudType::Both).is_some()); assert!(string_to_baud("115200", flags::BaudType::Both).is_some()); @@ -1455,14 +1413,7 @@ mod tests { assert!(string_to_baud("19200", flags::BaudType::Both).is_some()); } - #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "ios", - target_os = "macos", - target_os = "netbsd", - target_os = "openbsd" - ))] + #[cfg(bsd)] { assert!(string_to_baud("9600", flags::BaudType::Both).is_some()); assert!(string_to_baud("115200", flags::BaudType::Both).is_some()); @@ -1473,14 +1424,7 @@ mod tests { #[test] fn test_string_to_baud_invalid() { - #[cfg(not(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "ios", - target_os = "macos", - target_os = "netbsd", - target_os = "openbsd" - )))] + #[cfg(not(bsd))] { assert_eq!(string_to_baud("995", flags::BaudType::Both), None); assert_eq!(string_to_baud("invalid", flags::BaudType::Both), None); diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index e1686f459b4..ec8ae450344 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -4,8 +4,6 @@ // file that was distributed with this source code. // spell-checker:ignore (ToDO) sbytes slen dlen memmem memmap Mmap mmap SIGBUS -#[cfg(unix)] -uucore::init_startup_state_capture!(); mod error; @@ -225,11 +223,99 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()> Ok(()) } +/// Make the regex flavor compatible with `regex` crate +/// +/// Concretely: +/// - Toggle escaping of (), |, {} +/// - Escape ^ and $ when not at edges +/// - Leave expressions inside [] unchanged +fn translate_regex_flavor(regex: &str) -> String { + let mut result = String::new(); + let mut chars = regex.chars().peekable(); + let mut inside_brackets = false; + let mut prev_was_backslash = false; + let mut last_char: Option = None; + + while let Some(c) = chars.next() { + let is_escaped = prev_was_backslash; + prev_was_backslash = false; + + match c { + // Unescape escaped (), |, {} when not inside brackets + '\\' if !inside_brackets && !is_escaped => { + if let Some(&next) = chars.peek() { + if matches!(next, '(' | ')' | '|' | '{' | '}') { + result.push(next); + last_char = Some(next); + chars.next(); + continue; + } + } + + result.push('\\'); + last_char = Some('\\'); + prev_was_backslash = true; + } + // Bracket tracking + '[' => { + inside_brackets = true; + result.push(c); + last_char = Some(c); + } + ']' => { + inside_brackets = false; + result.push(c); + last_char = Some(c); + } + // Escape (), |, {} when not escaped and outside brackets + '(' | ')' | '|' | '{' | '}' if !inside_brackets && !is_escaped => { + result.push('\\'); + result.push(c); + last_char = Some(c); + } + '^' if !inside_brackets && !is_escaped => { + let is_anchor_position = result.is_empty() || matches!(last_char, Some('(' | '|')); + if !is_anchor_position { + result.push('\\'); + } + result.push(c); + last_char = Some(c); + } + '$' if !inside_brackets && !is_escaped => { + let next_is_anchor_position = match chars.peek() { + None => true, + Some(&')' | &'|') => true, + Some(&'\\') => { + // Peek two ahead to see if it's \) or \| + let chars_vec: Vec = chars.clone().take(2).collect(); + matches!(chars_vec.get(1), Some(&')' | &'|')) + } + _ => false, + }; + if !next_is_anchor_position { + result.push('\\'); + } + result.push(c); + last_char = Some(c); + } + _ => { + result.push(c); + last_char = Some(c); + } + } + } + + result +} + #[allow(clippy::cognitive_complexity)] fn tac(filenames: &[OsString], before: bool, regex: bool, separator: &str) -> UResult<()> { // Compile the regular expression pattern if it is provided. let maybe_pattern = if regex { - match regex::bytes::Regex::new(separator) { + match regex::bytes::RegexBuilder::new(&translate_regex_flavor(separator)) + .multi_line(true) + .build() + { Ok(p) => Some(p), Err(e) => return Err(TacError::InvalidRegex(e).into()), } @@ -361,3 +447,88 @@ fn try_mmap_path(path: &Path) -> Option { Some(mmap) } + +#[cfg(test)] +mod tests_hybrid_flavor { + use super::translate_regex_flavor; + + #[test] + fn test_grouping_and_alternation() { + assert_eq!(translate_regex_flavor(r"\(abc\)"), r"(abc)"); + + assert_eq!(translate_regex_flavor(r"(abc)"), r"\(abc\)"); + + assert_eq!(translate_regex_flavor(r"a\|b"), r"a|b"); + + assert_eq!(translate_regex_flavor(r"a|b"), r"a\|b"); + } + + #[test] + fn test_quantifiers() { + assert_eq!(translate_regex_flavor("a+"), "a+"); + + assert_eq!(translate_regex_flavor("a*"), "a*"); + + assert_eq!(translate_regex_flavor("a?"), "a?"); + + assert_eq!(translate_regex_flavor(r"a\+"), r"a\+"); + + assert_eq!(translate_regex_flavor(r"a\*"), r"a\*"); + + assert_eq!(translate_regex_flavor(r"a\?"), r"a\?"); + } + + #[test] + fn test_intervals() { + assert_eq!(translate_regex_flavor(r"a\{1,3\}"), r"a{1,3}"); + + assert_eq!(translate_regex_flavor(r"a{1,3}"), r"a\{1,3\}"); + } + + #[test] + fn test_anchors_context() { + assert_eq!(translate_regex_flavor(r"^abc$"), r"^abc$"); + + assert_eq!(translate_regex_flavor(r"a^b"), r"a\^b"); + assert_eq!(translate_regex_flavor(r"a$b"), r"a\$b"); + + // Anchors inside groups (reset by \(...\) regardless of position) + assert_eq!(translate_regex_flavor(r"\(^abc\)"), r"(^abc)"); + assert_eq!(translate_regex_flavor(r"z\(^abc\)"), r"z(^abc)"); + assert_eq!(translate_regex_flavor(r"\(abc$\)"), r"(abc$)"); + assert_eq!(translate_regex_flavor(r"\(abc$\)z"), r"(abc$)z"); + + // Anchors inside alternation (reset by \| regardless of position) + assert_eq!(translate_regex_flavor(r"^a\|^b"), r"^a|^b"); + assert_eq!(translate_regex_flavor(r"x\|^b"), r"x|^b"); + assert_eq!(translate_regex_flavor(r"a$\|b$"), r"a$|b$"); + } + + #[test] + fn test_character_classes() { + assert_eq!(translate_regex_flavor(r"[a-z]"), r"[a-z]"); + + assert_eq!(translate_regex_flavor(r"[.]"), r"[.]"); + assert_eq!(translate_regex_flavor(r"[+]"), r"[+]"); + + assert_eq!(translate_regex_flavor(r"[]abc]"), r"[]abc]"); + + assert_eq!(translate_regex_flavor(r"[^]abc]"), r"[^]abc]"); + } + + #[test] + fn test_complex_strings() { + assert_eq!(translate_regex_flavor(r"(\d+)[+*]"), r"\(\d+\)[+*]"); + + assert_eq!(translate_regex_flavor(r"\(\d+\)\{2\}"), r"(\d+){2}"); + } + + #[test] + fn test_edge_cases() { + assert_eq!(translate_regex_flavor(r"abc\"), r"abc\"); + + assert_eq!(translate_regex_flavor(r"\\"), r"\\"); + + assert_eq!(translate_regex_flavor(r"\^"), r"\^"); + } +} diff --git a/src/uu/tail/Cargo.toml b/src/uu/tail/Cargo.toml index 055b624001e..f01b4f603f9 100644 --- a/src/uu/tail/Cargo.toml +++ b/src/uu/tail/Cargo.toml @@ -35,7 +35,6 @@ windows-sys = { workspace = true, features = [ "Win32_System_Threading", "Win32_Foundation", ] } -winapi-util = { workspace = true } [dev-dependencies] rstest = { workspace = true } diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index c1cfb333adf..7b82e956615 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -38,18 +38,8 @@ use uucore::translate; use uucore::{show, show_error}; -#[cfg(unix)] -uucore::init_startup_state_capture!(); - #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - // When we receive a SIGPIPE signal, we want to terminate the process so - // that we don't print any error messages to stderr. Rust ignores SIGPIPE - // (see https://github.com/rust-lang/rust/issues/62569), so we restore it's - // default action here. - #[cfg(not(target_os = "windows"))] - let _ = uucore::signals::enable_pipe_errors(); - let settings = parse_args(args)?; settings.check_warnings(); diff --git a/src/uu/tee/src/tee.rs b/src/uu/tee/src/tee.rs index cf3d89c0a98..026f7fd9515 100644 --- a/src/uu/tee/src/tee.rs +++ b/src/uu/tee/src/tee.rs @@ -19,7 +19,7 @@ use uucore::{format_usage, show_error}; #[cfg(target_os = "linux")] use uucore::signals::ensure_stdout_not_broken; #[cfg(unix)] -use uucore::signals::{enable_pipe_errors, ignore_interrupts}; +use uucore::signals::{disable_pipe_errors, ignore_interrupts}; mod options { pub const APPEND: &str = "append"; @@ -163,8 +163,8 @@ fn tee(options: &Options) -> Result<()> { if options.ignore_interrupts { ignore_interrupts().map_err(|_| Error::from(ErrorKind::Other))?; } - if options.output_error.is_none() { - enable_pipe_errors().map_err(|_| Error::from(ErrorKind::Other))?; + if options.output_error.is_some() { + disable_pipe_errors().map_err(|_| Error::from(ErrorKind::Other))?; } } let mut writers: Vec = options diff --git a/src/uu/test/src/test.rs b/src/uu/test/src/test.rs index 0e4e809d760..48d691a39dc 100644 --- a/src/uu/test/src/test.rs +++ b/src/uu/test/src/test.rs @@ -183,11 +183,13 @@ fn integers(a: &OsStr, b: &OsStr, op: &OsStr) -> ParseResult { // Parse the two inputs let a: i128 = a .to_str() + .map(|s| s.trim()) .and_then(|s| s.parse().ok()) .ok_or_else(|| ParseError::InvalidInteger(a.quote().to_string()))?; let b: i128 = b .to_str() + .map(|s| s.trim()) .and_then(|s| s.parse().ok()) .ok_or_else(|| ParseError::InvalidInteger(b.quote().to_string()))?; @@ -229,6 +231,7 @@ fn files(a: &OsStr, b: &OsStr, op: &OsStr) -> ParseResult { fn isatty(fd: &OsStr) -> ParseResult { fd.to_str() + .map(|s| s.trim()) .and_then(|s| s.parse().ok()) .ok_or_else(|| ParseError::InvalidInteger(fd.quote().to_string())) .map(|i| unsafe { libc::isatty(i) == 1 }) diff --git a/src/uu/timeout/Cargo.toml b/src/uu/timeout/Cargo.toml index c6b795628f7..e0f3db17109 100644 --- a/src/uu/timeout/Cargo.toml +++ b/src/uu/timeout/Cargo.toml @@ -20,10 +20,12 @@ path = "src/timeout.rs" [dependencies] clap = { workspace = true } libc = { workspace = true } -nix = { workspace = true, features = ["signal"] } uucore = { workspace = true, features = ["parser", "process", "signals"] } fluent = { workspace = true } +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["signal"] } + [[bin]] name = "timeout" path = "src/main.rs" diff --git a/src/uu/timeout/src/status.rs b/src/uu/timeout/src/status.rs index 1134fb88d8c..70fa2c09726 100644 --- a/src/uu/timeout/src/status.rs +++ b/src/uu/timeout/src/status.rs @@ -33,9 +33,6 @@ pub(crate) enum ExitStatus { /// When a signal is sent to the child process or `timeout` itself. SignalSent(usize), - - /// When `SIGTERM` signal received. - Terminated, } impl From for i32 { @@ -46,7 +43,6 @@ impl From for i32 { ExitStatus::CannotInvoke => 126, ExitStatus::CommandNotFound => 127, ExitStatus::SignalSent(s) => 128 + s as Self, - ExitStatus::Terminated => 143, } } } diff --git a/src/uu/timeout/src/timeout.rs b/src/uu/timeout/src/timeout.rs index 22c839c42a1..cac487036ab 100644 --- a/src/uu/timeout/src/timeout.rs +++ b/src/uu/timeout/src/timeout.rs @@ -4,15 +4,13 @@ // file that was distributed with this source code. // spell-checker:ignore (ToDO) tstr sigstr cmdname setpgid sigchld getpid -#[cfg(unix)] -uucore::init_startup_state_capture!(); mod status; use crate::status::ExitStatus; use clap::{Arg, ArgAction, Command}; -use std::io::ErrorKind; -use std::os::unix::process::{CommandExt, ExitStatusExt}; +use std::io::{ErrorKind, Write}; +use std::os::unix::process::ExitStatusExt; use std::process::{self, Child, Stdio}; use std::sync::atomic::{self, AtomicBool}; use std::time::Duration; @@ -22,16 +20,15 @@ use uucore::parser::parse_time; use uucore::process::ChildExt; use uucore::translate; -#[cfg(unix)] -use uucore::signals::enable_pipe_errors; - use uucore::{ - format_usage, show_error, + format_usage, signals::{signal_by_name_or_value, signal_name_by_value}, }; -use nix::sys::signal::{Signal, kill}; +use nix::sys::signal::{SigHandler, Signal, kill}; use nix::unistd::{Pid, getpid, setpgid}; +#[cfg(unix)] +use std::os::unix::process::CommandExt; pub mod options { pub static FOREGROUND: &str = "foreground"; @@ -182,32 +179,46 @@ pub fn uu_app() -> Command { .after_help(translate!("timeout-after-help")) } -/// Remove pre-existing SIGCHLD handlers that would make waiting for the child's exit code fail. -fn unblock_sigchld() { - unsafe { - nix::sys::signal::signal( - nix::sys::signal::Signal::SIGCHLD, - nix::sys::signal::SigHandler::SigDfl, - ) - .unwrap(); - } +/// Install SIGCHLD handler to ensure waiting for child works even if parent ignored SIGCHLD. +fn install_sigchld() { + extern "C" fn chld(_: libc::c_int) {} + let _ = unsafe { nix::sys::signal::signal(Signal::SIGCHLD, SigHandler::Handler(chld)) }; } -/// We should terminate child process when receiving TERM signal. +/// We should terminate child process when receiving termination signals. static SIGNALED: AtomicBool = AtomicBool::new(false); +/// Track which signal was received (0 = none/timeout expired naturally). +static RECEIVED_SIGNAL: std::sync::atomic::AtomicI32 = std::sync::atomic::AtomicI32::new(0); + +/// Install signal handlers for termination signals. +fn install_signal_handlers(term_signal: usize) { + extern "C" fn handle_signal(sig: libc::c_int) { + SIGNALED.store(true, atomic::Ordering::Relaxed); + RECEIVED_SIGNAL.store(sig, atomic::Ordering::Relaxed); + } -fn catch_sigterm() { - use nix::sys::signal; - - extern "C" fn handle_sigterm(signal: libc::c_int) { - let signal = signal::Signal::try_from(signal).unwrap(); - if signal == signal::Signal::SIGTERM { - SIGNALED.store(true, atomic::Ordering::Relaxed); + let handler = SigHandler::Handler(handle_signal); + let sigpipe_ignored = uucore::signals::sigpipe_was_ignored(); + + for sig in [ + Signal::SIGALRM, + Signal::SIGINT, + Signal::SIGQUIT, + Signal::SIGHUP, + Signal::SIGTERM, + Signal::SIGPIPE, + Signal::SIGUSR1, + Signal::SIGUSR2, + ] { + if sig == Signal::SIGPIPE && sigpipe_ignored { + continue; // Skip SIGPIPE if it was ignored by parent } + let _ = unsafe { nix::sys::signal::signal(sig, handler) }; } - let handler = signal::SigHandler::Handler(handle_sigterm); - unsafe { signal::signal(signal::Signal::SIGTERM, handler) }.unwrap(); + if let Ok(sig) = Signal::try_from(term_signal as i32) { + let _ = unsafe { nix::sys::signal::signal(sig, handler) }; + } } /// Report that a signal is being sent if the verbose flag is set. @@ -218,26 +229,29 @@ fn report_if_verbose(signal: usize, cmd: &str, verbose: bool) { } else { signal_name_by_value(signal).unwrap().to_string() }; - show_error!( - "{}", + let mut stderr = std::io::stderr(); + let _ = writeln!( + stderr, + "timeout: {}", translate!("timeout-verbose-sending-signal", "signal" => s, "command" => cmd.quote()) ); + let _ = stderr.flush(); } } fn send_signal(process: &mut Child, signal: usize, foreground: bool) { // NOTE: GNU timeout doesn't check for errors of signal. // The subprocess might have exited just after the timeout. - // Sending a signal now would return "No such process", but we should still try to kill the children. - if foreground { - let _ = process.send_signal(signal); - } else { - let _ = process.send_signal_group(signal); - let kill_signal = signal_by_name_or_value("KILL").unwrap(); - let continued_signal = signal_by_name_or_value("CONT").unwrap(); - if signal != kill_signal && signal != continued_signal { - _ = process.send_signal_group(continued_signal); - } + let _ = process.send_signal(signal); + if signal == 0 || foreground { + return; + } + let _ = process.send_signal_group(signal); + let kill_signal = signal_by_name_or_value("KILL").unwrap(); + let continued_signal = signal_by_name_or_value("CONT").unwrap(); + if signal != kill_signal && signal != continued_signal { + let _ = process.send_signal(continued_signal); + let _ = process.send_signal_group(continued_signal); } } @@ -334,27 +348,47 @@ fn timeout( if !foreground { let _ = setpgid(Pid::from_raw(0), Pid::from_raw(0)); } - #[cfg(unix)] - enable_pipe_errors()?; - let mut command = process::Command::new(&cmd[0]); - command + let mut cmd_builder = process::Command::new(&cmd[0]); + cmd_builder .args(&cmd[1..]) .stdin(Stdio::inherit()) .stdout(Stdio::inherit()) .stderr(Stdio::inherit()); - // If stdin was closed before Rust reopened it as /dev/null, close it in child - if uucore::signals::stdin_was_closed() { + #[cfg(unix)] + { + #[cfg(target_os = "linux")] + let death_sig = Signal::try_from(signal as i32).ok(); + let sigpipe_was_ignored = uucore::signals::sigpipe_was_ignored(); + let stdin_was_closed = uucore::signals::stdin_was_closed(); + unsafe { - command.pre_exec(|| { - libc::close(libc::STDIN_FILENO); + cmd_builder.pre_exec(move || { + // Reset terminal signals to default + let _ = nix::sys::signal::signal(Signal::SIGTTIN, SigHandler::SigDfl); + let _ = nix::sys::signal::signal(Signal::SIGTTOU, SigHandler::SigDfl); + // Preserve SIGPIPE ignore status if parent had it ignored + if sigpipe_was_ignored { + let _ = nix::sys::signal::signal(Signal::SIGPIPE, SigHandler::SigIgn); + } + // If stdin was closed before Rust reopened it as /dev/null, close it in child + if stdin_was_closed { + libc::close(libc::STDIN_FILENO); + } + #[cfg(target_os = "linux")] + if let Some(sig) = death_sig { + let _ = nix::sys::prctl::set_pdeathsig(sig); + } Ok(()) }); } } - let process = &mut command.spawn().map_err(|err| { + install_sigchld(); + install_signal_handlers(signal); + + let process = &mut cmd_builder.spawn().map_err(|err| { let status_code = match err.kind() { ErrorKind::NotFound => ExitStatus::CommandNotFound.into(), ErrorKind::PermissionDenied => ExitStatus::CannotInvoke.into(), @@ -365,8 +399,7 @@ fn timeout( translate!("timeout-error-failed-to-execute-process", "error" => err), ) })?; - unblock_sigchld(); - catch_sigterm(); + // Wait for the child process for the specified time period. // // If the process exits within the specified time period (the @@ -388,41 +421,51 @@ fn timeout( Err(exit_code.into()) } Ok(None) => { - report_if_verbose(signal, &cmd[0], verbose); - send_signal(process, signal, foreground); - match kill_after { - None => { - let status = process.wait()?; - if SIGNALED.load(atomic::Ordering::Relaxed) { - Err(ExitStatus::Terminated.into()) - } else if preserve_status { - if let Some(ec) = status.code() { - Err(ec.into()) - } else if let Some(sc) = status.signal() { - Err(ExitStatus::SignalSent(sc.try_into().unwrap()).into()) - } else { - Err(ExitStatus::CommandTimedOut.into()) - } - } else { - Err(ExitStatus::CommandTimedOut.into()) - } - } - Some(kill_after) => { - match wait_or_kill_process( - process, - &cmd[0], - kill_after, - preserve_status, - foreground, - verbose, - ) { - Ok(status) => Err(status.into()), - Err(e) => Err(USimpleError::new( - ExitStatus::TimeoutFailed.into(), - e.to_string(), - )), - } - } + let received_sig = RECEIVED_SIGNAL.load(atomic::Ordering::Relaxed); + let is_external_signal = received_sig > 0 && received_sig != libc::SIGALRM; + let signal_to_send = if is_external_signal { + received_sig as usize + } else { + signal + }; + + report_if_verbose(signal_to_send, &cmd[0], verbose); + send_signal(process, signal_to_send, foreground); + + if let Some(kill_after) = kill_after { + return match wait_or_kill_process( + process, + &cmd[0], + kill_after, + preserve_status, + foreground, + verbose, + ) { + Ok(status) => Err(status.into()), + Err(e) => Err(USimpleError::new( + ExitStatus::TimeoutFailed.into(), + e.to_string(), + )), + }; + } + + let status = process.wait()?; + if is_external_signal { + Err(ExitStatus::SignalSent(received_sig as usize).into()) + } else if SIGNALED.load(atomic::Ordering::Relaxed) { + Err(ExitStatus::CommandTimedOut.into()) + } else if preserve_status { + Err(status + .code() + .or_else(|| { + status + .signal() + .map(|s| ExitStatus::SignalSent(s as usize).into()) + }) + .unwrap_or(ExitStatus::CommandTimedOut.into()) + .into()) + } else { + Err(ExitStatus::CommandTimedOut.into()) } } Err(_) => { diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 2b20d29ce86..3a0ee625371 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -31,13 +31,6 @@ mod options { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - // When we receive a SIGPIPE signal, we want to terminate the process so - // that we don't print any error messages to stderr. Rust ignores SIGPIPE - // (see https://github.com/rust-lang/rust/issues/62569), so we restore it's - // default action here. - #[cfg(not(target_os = "windows"))] - let _ = uucore::signals::enable_pipe_errors(); - let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?; let delete_flag = matches.get_flag(options::DELETE); diff --git a/src/uu/tsort/Cargo.toml b/src/uu/tsort/Cargo.toml index 72559199c8c..8819596e0ab 100644 --- a/src/uu/tsort/Cargo.toml +++ b/src/uu/tsort/Cargo.toml @@ -23,16 +23,17 @@ clap = { workspace = true } fluent = { workspace = true } string-interner = { workspace = true } thiserror = { workspace = true } -nix = { workspace = true, features = ["fs"] } uucore = { workspace = true } +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["fs"] } + [[bin]] name = "tsort" path = "src/main.rs" [dev-dependencies] divan = { workspace = true } -tempfile = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } [[bench]] diff --git a/src/uu/tty/Cargo.toml b/src/uu/tty/Cargo.toml index 407e8b0d14b..77165c60581 100644 --- a/src/uu/tty/Cargo.toml +++ b/src/uu/tty/Cargo.toml @@ -19,10 +19,12 @@ path = "src/tty.rs" [dependencies] clap = { workspace = true } -nix = { workspace = true, features = ["term"] } uucore = { workspace = true, features = ["fs"] } fluent = { workspace = true } +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["term"] } + [[bin]] name = "tty" path = "src/main.rs" diff --git a/src/uu/tty/src/tty.rs b/src/uu/tty/src/tty.rs index 1469948b888..5bf5199a073 100644 --- a/src/uu/tty/src/tty.rs +++ b/src/uu/tty/src/tty.rs @@ -19,6 +19,11 @@ mod options { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { + // Disable SIGPIPE so we can handle broken pipe errors gracefully + // and exit with code 3 instead of being killed by the signal. + #[cfg(unix)] + let _ = uucore::signals::disable_pipe_errors(); + let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?; let silent = matches.get_flag(options::SILENT); diff --git a/src/uu/uname/src/uname.rs b/src/uu/uname/src/uname.rs index 383d5c581d0..c35e9d51a99 100644 --- a/src/uu/uname/src/uname.rs +++ b/src/uu/uname/src/uname.rs @@ -135,7 +135,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { os: matches.get_flag(options::OS), }; let output = UNameOutput::new(&options)?; - println_verbatim(output.display().as_os_str()).unwrap(); + println_verbatim(output.display().as_os_str()) + .map_err(|e| USimpleError::new(1, e.to_string()))?; Ok(()) } diff --git a/src/uu/unexpand/Cargo.toml b/src/uu/unexpand/Cargo.toml index 19128ad0354..d7ea1533b92 100644 --- a/src/uu/unexpand/Cargo.toml +++ b/src/uu/unexpand/Cargo.toml @@ -20,7 +20,6 @@ path = "src/unexpand.rs" [dependencies] thiserror = { workspace = true } clap = { workspace = true } -unicode-width = { workspace = true } uucore = { workspace = true } fluent = { workspace = true } diff --git a/src/uu/unexpand/src/unexpand.rs b/src/uu/unexpand/src/unexpand.rs index 1840f659c38..14c2b8b0b0a 100644 --- a/src/uu/unexpand/src/unexpand.rs +++ b/src/uu/unexpand/src/unexpand.rs @@ -14,7 +14,7 @@ use std::path::Path; use std::str::from_utf8; use thiserror::Error; use uucore::display::Quotable; -use uucore::error::{FromIo, UError, UResult, USimpleError}; +use uucore::error::{FromIo, UError, UResult, USimpleError, set_exit_code}; use uucore::translate; use uucore::{format_usage, show}; @@ -566,10 +566,28 @@ fn unexpand_line( Ok(()) } +fn unexpand_file( + file: &OsString, + output: &mut BufWriter, + options: &Options, + lastcol: usize, + tab_config: &TabConfig, +) -> UResult<()> { + let mut buf = Vec::new(); + let mut input = open(file)?; + loop { + match input.read_until(b'\n', &mut buf) { + Ok(0) => break, + Ok(_) => unexpand_line(&mut buf, output, options, lastcol, tab_config)?, + Err(e) => return Err(e.map_err_context(|| file.maybe_quote().to_string())), + } + } + Ok(()) +} + fn unexpand(options: &Options) -> UResult<()> { let mut output = BufWriter::new(stdout()); let tab_config = &options.tab_config; - let mut buf = Vec::new(); let lastcol = if tab_config.tabstops.len() > 1 && tab_config.increment_size.is_none() && tab_config.extend_size.is_none() @@ -580,19 +598,9 @@ fn unexpand(options: &Options) -> UResult<()> { }; for file in &options.files { - let mut fh = match open(file) { - Ok(reader) => reader, - Err(err) => { - show!(err); - continue; - } - }; - - while match fh.read_until(b'\n', &mut buf) { - Ok(s) => s > 0, - Err(_) => !buf.is_empty(), - } { - unexpand_line(&mut buf, &mut output, options, lastcol, tab_config)?; + if let Err(e) = unexpand_file(file, &mut output, options, lastcol, tab_config) { + show!(e); + set_exit_code(1); } } output.flush()?; diff --git a/src/uu/uniq/Cargo.toml b/src/uu/uniq/Cargo.toml index 59a463071ae..0bd19782717 100644 --- a/src/uu/uniq/Cargo.toml +++ b/src/uu/uniq/Cargo.toml @@ -24,7 +24,6 @@ fluent = { workspace = true } [dev-dependencies] divan = { workspace = true } -tempfile = { workspace = true } uucore = { workspace = true, features = ["benchmark", "parser"] } [[bin]] diff --git a/src/uu/uptime/Cargo.toml b/src/uu/uptime/Cargo.toml index 651b342cd96..0260390266f 100644 --- a/src/uu/uptime/Cargo.toml +++ b/src/uu/uptime/Cargo.toml @@ -30,7 +30,6 @@ fluent = { workspace = true } jiff = { workspace = true } [target.'cfg(target_os = "openbsd")'.dependencies] -utmp-classic = { workspace = true } [[bin]] name = "uptime" diff --git a/src/uu/yes/Cargo.toml b/src/uu/yes/Cargo.toml index 3b6e8d08fc1..33623c7c9f1 100644 --- a/src/uu/yes/Cargo.toml +++ b/src/uu/yes/Cargo.toml @@ -24,7 +24,6 @@ fluent = { workspace = true } [target.'cfg(unix)'.dependencies] uucore = { workspace = true, features = ["pipes", "signals"] } -nix = { workspace = true } [target.'cfg(not(unix))'.dependencies] uucore = { workspace = true, features = ["pipes"] } diff --git a/src/uu/yes/src/yes.rs b/src/uu/yes/src/yes.rs index a5aaa18a867..92527221b91 100644 --- a/src/uu/yes/src/yes.rs +++ b/src/uu/yes/src/yes.rs @@ -11,8 +11,6 @@ use std::ffi::OsString; use std::io::{self, Write}; use uucore::error::{UResult, USimpleError}; use uucore::format_usage; -#[cfg(unix)] -use uucore::signals::enable_pipe_errors; use uucore::translate; // it's possible that using a smaller or larger buffer might provide better performance on some @@ -29,6 +27,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { match exec(&buffer) { Ok(()) => Ok(()), + // On Windows, silently handle broken pipe since there's no SIGPIPE + #[cfg(windows)] Err(err) if err.kind() == io::ErrorKind::BrokenPipe => Ok(()), Err(err) => Err(USimpleError::new( 1, @@ -113,8 +113,6 @@ fn prepare_buffer(buf: &mut Vec) { pub fn exec(bytes: &[u8]) -> io::Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); - #[cfg(unix)] - enable_pipe_errors()?; loop { stdout.write_all(bytes)?; diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 1cbc276e4a7..507f7740c6e 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -26,7 +26,6 @@ bstr = { workspace = true, optional = true } clap = { workspace = true } uucore_procs = { workspace = true } unit-prefix = { workspace = true, optional = true } -phf = { workspace = true } dns-lookup = { workspace = true, optional = true } dunce = { version = "1.0.4", optional = true } glob = { workspace = true, optional = true } @@ -69,9 +68,15 @@ num-traits = { workspace = true, optional = true } selinux = { workspace = true, optional = true } # icu stuff +icu_calendar = { workspace = true, optional = true, features = [ + "compiled_data", +] } icu_collator = { workspace = true, optional = true, features = [ "compiled_data", ] } +icu_datetime = { workspace = true, optional = true, features = [ + "compiled_data", +] } icu_decimal = { workspace = true, optional = true, features = [ "compiled_data", ] } @@ -84,19 +89,19 @@ fluent-syntax = { workspace = true } unic-langid = { workspace = true } fluent-bundle = { workspace = true } thiserror = { workspace = true } + [target.'cfg(unix)'.dependencies] -walkdir = { workspace = true, optional = true } nix = { workspace = true, features = [ + "dir", "fs", - "uio", - "zerocopy", + "poll", "signal", - "dir", + "uio", "user", - "poll", + "zerocopy", ] } +walkdir = { workspace = true, optional = true } xattr = { workspace = true, optional = true } -itertools = { workspace = true, optional = true } [dev-dependencies] tempfile = { workspace = true } @@ -120,7 +125,7 @@ windows-sys = { workspace = true, optional = true, default-features = false, fea utmp-classic = { workspace = true, optional = true } [features] -default = [] +default = ["signals"] # * non-default features backup-control = [] colors = [] @@ -144,10 +149,11 @@ format = [ "quoting-style", "unit-prefix", ] -i18n-all = ["i18n-collator", "i18n-decimal"] +i18n-all = ["i18n-collator", "i18n-decimal", "i18n-datetime"] i18n-common = ["icu_locale"] i18n-collator = ["i18n-common", "icu_collator"] i18n-decimal = ["i18n-common", "icu_decimal", "icu_provider"] +i18n-datetime = ["i18n-common", "icu_calendar", "icu_datetime"] mode = ["libc"] perms = ["entries", "libc", "walkdir"] buf-copy = [] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index cd2ce405ffe..03d4101607b 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -81,7 +81,7 @@ pub mod tty; pub mod fsxattr; #[cfg(feature = "hardware")] pub mod hardware; -#[cfg(all(target_os = "linux", feature = "selinux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] pub mod selinux; #[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))] pub mod signals; diff --git a/src/uucore/src/lib/features/checksum/compute.rs b/src/uucore/src/lib/features/checksum/compute.rs index c08765af40e..c5b0cf6e4b6 100644 --- a/src/uucore/src/lib/features/checksum/compute.rs +++ b/src/uucore/src/lib/features/checksum/compute.rs @@ -5,12 +5,12 @@ // spell-checker:ignore bitlen -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::fs::File; use std::io::{self, BufReader, Read, Write}; use std::path::Path; -use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader, escape_filename}; +use crate::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, digest_reader, escape_filename}; use crate::error::{FromIo, UResult, USimpleError}; use crate::line_ending::LineEnding; use crate::sum::DigestOutput; @@ -103,42 +103,76 @@ impl OutputFormat { fn is_raw(&self) -> bool { *self == Self::Raw } -} -/// Use already-processed arguments to decide the output format. -pub fn figure_out_output_format( - algo: SizedAlgoKind, - tag: bool, - binary: bool, - raw: bool, - base64: bool, -) -> OutputFormat { - // Raw output format takes precedence over anything else. - if raw { - return OutputFormat::Raw; - } + /// Find the correct output format for cksum. + pub fn from_cksum(algo: AlgoKind, tag: bool, binary: bool, raw: bool, base64: bool) -> Self { + // Raw output format takes precedence over anything else. + if raw { + return Self::Raw; + } + + // Then, if the algo is legacy, takes precedence over the rest + if algo.is_legacy() { + return Self::Legacy; + } - // Then, if the algo is legacy, takes precedence over the rest - if algo.is_legacy() { - return OutputFormat::Legacy; + let digest_format = if base64 { + DigestFormat::Base64 + } else { + DigestFormat::Hexadecimal + }; + + // After that, decide between tagged and untagged output + if tag { + Self::Tagged(digest_format) + } else { + let reading_mode = if binary { + ReadingMode::Binary + } else { + ReadingMode::Text + }; + Self::Untagged(digest_format, reading_mode) + } } - let digest_format = if base64 { - DigestFormat::Base64 - } else { - DigestFormat::Hexadecimal - }; + /// Find the correct output format for a standalone checksum util (b2sum, + /// md5sum, etc) + /// + /// Since standalone utils can't use the Raw or Legacy output format, it is + /// decided only using the --tag, --binary and --text arguments. + pub fn from_standalone(args: impl Iterator) -> UResult { + let mut text = true; + let mut tag = false; + + for arg in args { + if arg == "--" { + break; + } else if arg == "--tag" { + tag = true; + text = false; + } else if arg == "--binary" || arg == "-b" { + text = false; + } else if arg == "--text" || arg == "-t" { + // Finding a `--text` after `--tag` is an error. + if tag { + return Err(ChecksumError::TextAfterTag.into()); + } + text = true; + } + } - // After that, decide between tagged and untagged output - if tag { - OutputFormat::Tagged(digest_format) - } else { - let reading_mode = if binary { - ReadingMode::Binary + if tag { + Ok(Self::Tagged(DigestFormat::Hexadecimal)) } else { - ReadingMode::Text - }; - OutputFormat::Untagged(digest_format, reading_mode) + Ok(Self::Untagged( + DigestFormat::Hexadecimal, + if text { + ReadingMode::Text + } else { + ReadingMode::Binary + }, + )) + } } } diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs index 2f3d28b4121..7ae4c775be6 100644 --- a/src/uucore/src/lib/features/checksum/mod.rs +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -397,6 +397,8 @@ pub enum ChecksumError { BinaryTextConflict, #[error("--text mode is only supported with --untagged")] TextWithoutUntagged, + #[error("--tag does not support --text mode")] + TextAfterTag, #[error("--check is not supported with --algorithm={{bsd,sysv,crc,crc32b}}")] AlgorithmNotSupportedWithCheck, #[error("You cannot combine multiple hash algorithms!")] diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs new file mode 100644 index 00000000000..e5d6a666286 --- /dev/null +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -0,0 +1,264 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Locale-aware datetime formatting utilities using ICU +// spell-checker:ignore fieldsets janvier + +use icu_calendar::Date; +use icu_datetime::DateTimeFormatter; +use icu_datetime::fieldsets; +use icu_locale::Locale; +use std::sync::OnceLock; + +use crate::i18n::get_locale_from_env; + +/// Get the locale for time/date formatting from LC_TIME environment variable +pub fn get_time_locale() -> &'static (Locale, super::UEncoding) { + static TIME_LOCALE: OnceLock<(Locale, super::UEncoding)> = OnceLock::new(); + + TIME_LOCALE.get_or_init(|| get_locale_from_env("LC_TIME")) +} + +/// Check if we should use ICU for locale-aware time/date formatting +/// +/// Returns true for non-C/POSIX locales, false otherwise +pub fn should_use_icu_locale() -> bool { + use icu_locale::locale; + + let (locale, _encoding) = get_time_locale(); + + // Use ICU for non-default locales (anything other than C/POSIX) + // The default locale is "und" (undefined) representing C/POSIX + *locale != locale!("und") +} + +/// Get a localized month name for the given month number (1-12) +/// +/// # Arguments +/// * `month` - Month number (1 = January, 2 = February, etc.) +/// * `full` - If true, return full month name (e.g., "January"), otherwise abbreviated (e.g., "Jan") +/// +/// # Returns +/// Localized month name, or falls back to English if locale is not supported +pub fn get_localized_month_name(month: u8, full: bool) -> String { + // Get locale from environment + let (locale, _encoding) = get_time_locale(); + + // Create a date with the specified month (use year 2000, day 1 as arbitrary values) + let Ok(date) = Date::try_new_gregorian(2000, month, 1) else { + // Invalid month, return empty string to signal failure + return String::new(); + }; + + // Configure field set for month formatting + // Use Year-Month-Day format to ensure we get textual month names + let field_set = if full { + fieldsets::YMD::long() + } else { + fieldsets::YMD::medium() + }; + + // Create formatter with locale + let Ok(formatter) = DateTimeFormatter::try_new(locale.clone().into(), field_set) else { + // Failed to create formatter, return empty string to signal failure + return String::new(); + }; + + // Format the date to get full date, then extract month + let formatted = formatter.format(&date).to_string(); + // Extract month name from formatted date like "15 janvier 2000" or "2000-01-15" + // Look for a word that contains letters (the month name) + let words: Vec<&str> = formatted.split_whitespace().collect(); + + // Return the month name as extracted from ICU (no further processing needed) + // ICU already handles the full vs abbreviated formatting correctly + words + .iter() + .find(|word| word.chars().any(|c| c.is_alphabetic())) + .map_or_else(String::new, |s| (*s).to_string()) +} + +/// Get a localized day name for the given date components +/// +/// # Arguments +/// * `year` - The year +/// * `month` - The month (1-12) +/// * `day` - The day of the month +/// * `full` - If true, return full day name (e.g., "Monday"), otherwise abbreviated (e.g., "Mon") +/// +/// # Returns +/// Localized day name, or falls back to empty string if locale is not supported +pub fn get_localized_day_name(year: i32, month: u8, day: u8, full: bool) -> String { + // Create ICU Date from components + let Ok(date) = Date::try_new_gregorian(year, month, day) else { + return String::new(); + }; + + // Get locale from environment + let (locale, _encoding) = get_time_locale(); + + // Configure field set for day formatting + let field_set = if full { + fieldsets::E::long() // Full day name + } else { + fieldsets::E::short() // Abbreviated day name + }; + + // Create formatter with locale + let Ok(formatter) = DateTimeFormatter::try_new(locale.clone().into(), field_set) else { + return String::new(); + }; + + // Format the date to get day name + let formatted = formatter.format(&date).to_string(); + formatted.trim().to_string() +} + +/// Determine the appropriate calendar system for a given locale +pub fn get_locale_calendar_type(locale: &Locale) -> CalendarType { + let locale_str = locale.to_string(); + + match locale_str.as_str() { + // Thai locales use Buddhist calendar + s if s.starts_with("th") => CalendarType::Buddhist, + // Persian/Farsi locales use Persian calendar (Solar Hijri) + s if s.starts_with("fa") => CalendarType::Persian, + // Amharic (Ethiopian) locales use Ethiopian calendar + s if s.starts_with("am") => CalendarType::Ethiopian, + // Default to Gregorian for all other locales + _ => CalendarType::Gregorian, + } +} + +/// Calendar types supported for locale-aware formatting +#[derive(Debug, Clone, PartialEq)] +pub enum CalendarType { + /// Gregorian calendar (used by most locales) + Gregorian, + /// Buddhist calendar (Thai locales) - adds 543 years to Gregorian year + Buddhist, + /// Persian Solar Hijri calendar (Persian/Farsi locales) - subtracts 621/622 years + Persian, + /// Ethiopian calendar (Amharic locales) - subtracts 7/8 years + Ethiopian, +} + +/// Convert a Gregorian date to the appropriate calendar system for a locale +/// +/// # Arguments +/// * `year` - Gregorian year +/// * `month` - Month (1-12) +/// * `day` - Day (1-31) +/// * `calendar_type` - Target calendar system +/// +/// # Returns +/// * `Some((era_year, month, day))` - Date in target calendar system +/// * `None` - If conversion fails +pub fn convert_date_to_locale_calendar( + year: i32, + month: u8, + day: u8, + calendar_type: &CalendarType, +) -> Option<(i32, u8, u8)> { + match calendar_type { + CalendarType::Gregorian => Some((year, month, day)), + CalendarType::Buddhist => { + // Buddhist calendar: Gregorian year + 543 + Some((year + 543, month, day)) + } + CalendarType::Persian => { + // Persian calendar conversion (Solar Hijri) + // March 21 (Nowruz) is roughly the start of the Persian year + let persian_year = if month > 3 || (month == 3 && day >= 21) { + year - 621 // After March 21 + } else { + year - 622 // Before March 21 + }; + Some((persian_year, month, day)) + } + CalendarType::Ethiopian => { + // Ethiopian calendar conversion + // September 11/12 is roughly the start of the Ethiopian year + let ethiopian_year = if month > 9 || (month == 9 && day >= 11) { + year - 7 // After September 11 + } else { + year - 8 // Before September 11 + }; + Some((ethiopian_year, month, day)) + } + } +} + +/// Get the era year for a given date and locale +pub fn get_era_year(year: i32, month: u8, day: u8, locale: &Locale) -> Option { + // Validate input date + if !(1..=12).contains(&month) || !(1..=31).contains(&day) { + return None; + } + + let calendar_type = get_locale_calendar_type(locale); + match calendar_type { + CalendarType::Gregorian => None, + _ => convert_date_to_locale_calendar(year, month, day, &calendar_type) + .map(|(era_year, _, _)| era_year), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_localized_month_name_fallback() { + // This should work even if locale is not available + let name = get_localized_month_name(1, true); + // The function may return empty string if ICU fails, which is fine + // The caller (date.rs) will handle this by falling back to jiff + assert!(name.is_empty() || name.len() >= 3); + } + + #[test] + fn test_calendar_type_detection() { + let thai_locale = icu_locale::locale!("th-TH"); + let persian_locale = icu_locale::locale!("fa-IR"); + let amharic_locale = icu_locale::locale!("am-ET"); + let english_locale = icu_locale::locale!("en-US"); + + assert_eq!( + get_locale_calendar_type(&thai_locale), + CalendarType::Buddhist + ); + assert_eq!( + get_locale_calendar_type(&persian_locale), + CalendarType::Persian + ); + assert_eq!( + get_locale_calendar_type(&amharic_locale), + CalendarType::Ethiopian + ); + assert_eq!( + get_locale_calendar_type(&english_locale), + CalendarType::Gregorian + ); + } + + #[test] + fn test_era_year_conversion() { + let thai_locale = icu_locale::locale!("th-TH"); + let persian_locale = icu_locale::locale!("fa-IR"); + let amharic_locale = icu_locale::locale!("am-ET"); + + // Test Thai Buddhist calendar (2026 + 543 = 2569) + assert_eq!(get_era_year(2026, 6, 15, &thai_locale), Some(2569)); + + // Test Persian calendar (rough approximation) + assert_eq!(get_era_year(2026, 3, 22, &persian_locale), Some(1405)); + assert_eq!(get_era_year(2026, 3, 19, &persian_locale), Some(1404)); + + // Test Ethiopian calendar (rough approximation) + assert_eq!(get_era_year(2026, 9, 12, &amharic_locale), Some(2019)); + assert_eq!(get_era_year(2026, 9, 10, &amharic_locale), Some(2018)); + } +} diff --git a/src/uucore/src/lib/features/i18n/decimal.rs b/src/uucore/src/lib/features/i18n/decimal.rs index 9fa2d8d7bc7..0a901143c6b 100644 --- a/src/uucore/src/lib/features/i18n/decimal.rs +++ b/src/uucore/src/lib/features/i18n/decimal.rs @@ -37,15 +37,47 @@ pub fn locale_decimal_separator() -> &'static str { DECIMAL_SEP.get_or_init(|| get_decimal_separator(get_numeric_locale().0.clone())) } +/// Return the grouping separator for the given locale +fn get_grouping_separator(loc: Locale) -> String { + let data_locale = DataLocale::from(loc); + + let request = DataRequest { + id: DataIdentifierBorrowed::for_locale(&data_locale), + metadata: DataRequestMetadata::default(), + }; + + let response: DataResponse = + icu_decimal::provider::Baked.load(request).unwrap(); + + response.payload.get().grouping_separator().to_string() +} + +/// Return the grouping separator from the language we're working with. +/// Example: +/// Say we need to format 1,000 +/// en_US: 1,000 -> grouping separator is ',' +/// fr_FR: 1 000 -> grouping separator is '\u{202f}' +pub fn locale_grouping_separator() -> &'static str { + static GROUPING_SEP: OnceLock = OnceLock::new(); + + GROUPING_SEP.get_or_init(|| get_grouping_separator(get_numeric_locale().0.clone())) +} + #[cfg(test)] mod tests { use icu_locale::locale; - use super::get_decimal_separator; + use super::{get_decimal_separator, get_grouping_separator}; #[test] - fn test_simple_separator() { + fn test_simple_decimal_separator() { assert_eq!(get_decimal_separator(locale!("en")), "."); assert_eq!(get_decimal_separator(locale!("fr")), ","); } + + #[test] + fn test_simple_grouping_separator() { + assert_eq!(get_grouping_separator(locale!("en")), ","); + assert_eq!(get_grouping_separator(locale!("fr")), "\u{202f}"); + } } diff --git a/src/uucore/src/lib/features/i18n/mod.rs b/src/uucore/src/lib/features/i18n/mod.rs index 79c804a033f..e8e0f3f3c5d 100644 --- a/src/uucore/src/lib/features/i18n/mod.rs +++ b/src/uucore/src/lib/features/i18n/mod.rs @@ -9,6 +9,8 @@ use icu_locale::{Locale, locale}; #[cfg(feature = "i18n-collator")] pub mod collator; +#[cfg(feature = "i18n-datetime")] +pub mod datetime; #[cfg(feature = "i18n-decimal")] pub mod decimal; @@ -31,7 +33,7 @@ const DEFAULT_LOCALE: Locale = locale!("und"); /// 3. LANG /// /// Or fallback on Posix locale, with ASCII encoding. -fn get_locale_from_env(locale_name: &str) -> (Locale, UEncoding) { +pub fn get_locale_from_env(locale_name: &str) -> (Locale, UEncoding) { let locale_var = ["LC_ALL", locale_name, "LANG"] .iter() .find_map(|&key| std::env::var(key).ok()); diff --git a/src/uucore/src/lib/features/process.rs b/src/uucore/src/lib/features/process.rs index 043d4850d9b..b19d4a752cc 100644 --- a/src/uucore/src/lib/features/process.rs +++ b/src/uucore/src/lib/features/process.rs @@ -105,11 +105,29 @@ impl ChildExt for Child { } fn send_signal_group(&mut self, signal: usize) -> io::Result<()> { - // Ignore the signal, so we don't go into a signal loop. - if unsafe { libc::signal(signal as i32, libc::SIG_IGN) } == usize::MAX { - return Err(io::Error::last_os_error()); + // Send signal to our process group (group 0 = caller's group). + // This matches GNU coreutils behavior: if the child has remained in our + // process group, it will receive this signal along with all other processes + // in the group. If the child has created its own process group (via setpgid), + // it won't receive this group signal, but will have received the direct signal. + + // Signal 0 is special - it just checks if process exists, doesn't send anything. + // No need to manipulate signal handlers for it. + if signal == 0 { + let result = unsafe { libc::kill(0, 0) }; + return if result == 0 { + Ok(()) + } else { + Err(io::Error::last_os_error()) + }; } - if unsafe { libc::kill(0, signal as i32) } == 0 { + + // Ignore the signal temporarily so we don't receive it ourselves. + let old_handler = unsafe { libc::signal(signal as i32, libc::SIG_IGN) }; + let result = unsafe { libc::kill(0, signal as i32) }; + // Restore the old handler + unsafe { libc::signal(signal as i32, old_handler) }; + if result == 0 { Ok(()) } else { Err(io::Error::last_os_error()) diff --git a/src/uucore/src/lib/features/signals.rs b/src/uucore/src/lib/features/signals.rs index 25b91d585d1..6d0956b39ca 100644 --- a/src/uucore/src/lib/features/signals.rs +++ b/src/uucore/src/lib/features/signals.rs @@ -410,7 +410,7 @@ pub fn signal_name_by_value(signal_value: usize) -> Option<&'static str> { ALL_SIGNALS.get(signal_value).copied() } -/// Returns the default signal value. +/// Restores SIGPIPE to default behavior (process terminates on broken pipe). #[cfg(unix)] pub fn enable_pipe_errors() -> Result<(), Errno> { // We pass the error as is, the return value would just be Ok(SigDfl), so we can safely ignore it. @@ -418,6 +418,15 @@ pub fn enable_pipe_errors() -> Result<(), Errno> { unsafe { signal(SIGPIPE, SigDfl) }.map(|_| ()) } +/// Ignores SIGPIPE signal (broken pipe errors are returned instead of terminating). +/// Use this to override the default SIGPIPE handling when you need to handle +/// broken pipe errors gracefully (e.g., tee with --output-error). +#[cfg(unix)] +pub fn disable_pipe_errors() -> Result<(), Errno> { + // SAFETY: this function is safe as long as we do not use a custom SigHandler -- we use the default one. + unsafe { signal(SIGPIPE, SigIgn) }.map(|_| ()) +} + /// Ignores the SIGINT signal. #[cfg(unix)] pub fn ignore_interrupts() -> Result<(), Errno> { diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index c1ece8bff35..228ca3eded6 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -122,7 +122,7 @@ pub use crate::features::fsext; #[cfg(all(unix, feature = "fsxattr"))] pub use crate::features::fsxattr; -#[cfg(all(target_os = "linux", feature = "selinux"))] +#[cfg(all(feature = "selinux", any(target_os = "linux", target_os = "android")))] pub use crate::features::selinux; #[cfg(all(target_os = "linux", feature = "smack"))] @@ -172,11 +172,6 @@ pub fn get_canonical_util_name(util_name: &str) -> &str { // uu_test aliases - '[' is an alias for test "[" => "test", - // hashsum aliases - all these hash commands are aliases for hashsum - "md5sum" | "sha1sum" | "sha224sum" | "sha256sum" | "sha384sum" | "sha512sum" | "b2sum" => { - "hashsum" - } - "dir" => "ls", // dir is an alias for ls // Default case - return the util name as is diff --git a/src/uucore/src/lib/mods/locale.rs b/src/uucore/src/lib/mods/locale.rs index ec9a78b433c..a6dad4c6285 100644 --- a/src/uucore/src/lib/mods/locale.rs +++ b/src/uucore/src/lib/mods/locale.rs @@ -156,6 +156,22 @@ fn create_bundle( // Then, try to load utility-specific strings from the utility's locale directory try_add_resource_from(get_locales_dir(util_name).ok()); + // checksum binaries also require fluent files from the checksum_common crate + if [ + "cksum", + "b2sum", + "md5sum", + "sha1sum", + "sha224sum", + "sha256sum", + "sha384sum", + "sha512sum", + ] + .contains(&util_name) + { + try_add_resource_from(get_locales_dir("checksum_common").ok()); + } + // If we have at least one resource, return the bundle if bundle.has_message("common-error") || bundle.has_message(&format!("{util_name}-about")) { Ok(bundle) diff --git a/src/uucore_procs/src/lib.rs b/src/uucore_procs/src/lib.rs index e60e2b822c7..c73f542a98b 100644 --- a/src/uucore_procs/src/lib.rs +++ b/src/uucore_procs/src/lib.rs @@ -16,14 +16,34 @@ use quote::quote; //* ref: [path construction from LitStr](https://oschwald.github.io/maxminddb-rust/syn/struct.LitStr.html) @@ /// A procedural macro to define the main function of a uutils binary. +/// +/// This macro handles: +/// - SIGPIPE state capture at process startup (before Rust runtime overrides it) +/// - SIGPIPE restoration to default if parent didn't explicitly ignore it +/// - Disabling Rust signal handlers for proper core dumps +/// - Error handling and exit code management #[proc_macro_attribute] pub fn main(_args: TokenStream, stream: TokenStream) -> TokenStream { let stream = proc_macro2::TokenStream::from(stream); let new = quote!( + // Initialize SIGPIPE state capture at process startup (Unix only). + // This must be at module level to set up the .init_array static that runs + // before main() to capture whether SIGPIPE was ignored by the parent process. + #[cfg(unix)] + uucore::init_startup_state_capture!(); + pub fn uumain(args: impl uucore::Args) -> i32 { #stream + // Restore SIGPIPE to default if it wasn't explicitly ignored by parent. + // The Rust runtime ignores SIGPIPE, but we need to respect the parent's + // signal disposition for proper pipeline behavior (GNU compatibility). + #[cfg(unix)] + if !uucore::signals::sigpipe_was_ignored() { + let _ = uucore::signals::enable_pipe_errors(); + } + // disable rust signal handlers (otherwise processes don't dump core after e.g. one SIGSEGV) #[cfg(unix)] uucore::disable_rust_signal_handlers().expect("Disabling rust signal handlers failed"); diff --git a/tests/by-util/test_b2sum.rs b/tests/by-util/test_b2sum.rs new file mode 100644 index 00000000000..30e2c46ca35 --- /dev/null +++ b/tests/by-util/test_b2sum.rs @@ -0,0 +1,299 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use rstest::rstest; + +use uutests::new_ucmd; +use uutests::util::TestScenario; +use uutests::util_name; +// spell-checker:ignore checkfile, testf, ntestf +macro_rules! get_hash( + ($str:expr) => ( + $str.split(' ').collect::>()[0] + ); +); + +macro_rules! test_digest_with_len { + ($id:ident, $size:expr) => { + mod $id { + use uutests::util::*; + use uutests::util_name; + static LENGTH_ARG: &'static str = concat!("--length=", stringify!($size)); + static EXPECTED_FILE: &'static str = concat!(stringify!($id), ".expected"); + static CHECK_FILE: &'static str = concat!(stringify!($id), ".checkfile"); + static INPUT_FILE: &'static str = "input.txt"; + + #[test] + fn test_single_file() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(LENGTH_ARG) + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_stdin() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(LENGTH_ARG) + .pipe_in_fixture(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_check() { + let ts = TestScenario::new(util_name!()); + println!("File content='{}'", ts.fixtures.read(INPUT_FILE)); + println!("Check file='{}'", ts.fixtures.read(CHECK_FILE)); + + ts.ucmd() + .args(&[LENGTH_ARG, "--check", CHECK_FILE]) + .succeeds() + .no_stderr() + .stdout_is("input.txt: OK\n"); + } + + #[test] + fn test_zero() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(LENGTH_ARG) + .arg("--zero") + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_missing_file() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.write("a", "file1\n"); + at.write("c", "file3\n"); + + ts.ucmd() + .args(&[LENGTH_ARG, "a", "b", "c"]) + .fails() + .stdout_contains("a\n") + .stdout_contains("c\n") + .stderr_contains("b: No such file or directory"); + } + } + }; +} + +test_digest_with_len! {b2sum, 512} + +#[test] +fn test_check_b2sum_length_option_0() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("testf", "foobar\n"); + at.write("testf.b2sum", "9e2bf63e933e610efee4a8d6cd4a9387e80860edee97e27db3b37a828d226ab1eb92a9cdd8ca9ca67a753edaf8bd89a0558496f67a30af6f766943839acf0110 testf\n"); + + scene + .ccmd("b2sum") + .arg("--length=0") + .arg("-c") + .arg(at.subdir.join("testf.b2sum")) + .succeeds() + .stdout_only("testf: OK\n"); +} + +#[test] +fn test_check_b2sum_length_duplicate() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("testf", "foobar\n"); + + scene + .ccmd("b2sum") + .arg("--length=123") + .arg("--length=128") + .arg("testf") + .succeeds() + .stdout_contains("d6d45901dec53e65d2b55fb6e2ab67b0"); +} + +#[test] +fn test_check_b2sum_length_option_8() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("testf", "foobar\n"); + at.write("testf.b2sum", "6a testf\n"); + + scene + .ccmd("b2sum") + .arg("--length=8") + .arg("-c") + .arg(at.subdir.join("testf.b2sum")) + .succeeds() + .stdout_only("testf: OK\n"); +} + +#[test] +fn test_invalid_b2sum_length_option_not_multiple_of_8() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("testf", "foobar\n"); + + scene + .ccmd("b2sum") + .arg("--length=9") + .arg(at.subdir.join("testf")) + .fails_with_code(1) + .stderr_contains("b2sum: invalid length: '9'") + .stderr_contains("b2sum: length is not a multiple of 8"); +} + +#[rstest] +#[case("513")] +#[case("1024")] +#[case("18446744073709552000")] +fn test_invalid_b2sum_length_option_too_large(#[case] len: &str) { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("testf", "foobar\n"); + + scene + .ccmd("b2sum") + .arg("--length") + .arg(len) + .arg(at.subdir.join("testf")) + .fails_with_code(1) + .no_stdout() + .stderr_contains(format!("b2sum: invalid length: '{len}'")) + .stderr_contains("b2sum: maximum digest length for 'BLAKE2b' is 512 bits"); +} + +#[test] +fn test_check_b2sum_tag_output() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + + scene + .ccmd("b2sum") + .arg("--length=0") + .arg("--tag") + .arg("f") + .succeeds() + .stdout_only("BLAKE2b (f) = 786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce\n"); + + scene + .ccmd("b2sum") + .arg("--length=128") + .arg("--tag") + .arg("f") + .succeeds() + .stdout_only("BLAKE2b-128 (f) = cae66941d9efbd404e4d88758ea67670\n"); +} + +#[test] +fn test_check_b2sum_verify() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("a", "a\n"); + + scene + .ccmd("b2sum") + .arg("--tag") + .arg("a") + .succeeds() + .stdout_only("BLAKE2b (a) = bedfbb90d858c2d67b7ee8f7523be3d3b54004ef9e4f02f2ad79a1d05bfdfe49b81e3c92ebf99b504102b6bf003fa342587f5b3124c205f55204e8c4b4ce7d7c\n"); + + scene + .ccmd("b2sum") + .arg("--tag") + .arg("-l") + .arg("128") + .arg("a") + .succeeds() + .stdout_only("BLAKE2b-128 (a) = b93e0fc7bb21633c08bba07c5e71dc00\n"); +} + +#[test] +fn test_invalid_arg() { + new_ucmd!().arg("--definitely-invalid").fails_with_code(1); +} + +#[test] +fn test_check_b2sum_strict_check() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch("f"); + + let checksums = [ + "2e f\n", + "e4a6a0577479b2b4 f\n", + "cae66941d9efbd404e4d88758ea67670 f\n", + "246c0442cd564aced8145b8b60f1370aa7 f\n", + "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8 f\n", + "4ded8c5fc8b12f3273f877ca585a44ad6503249a2b345d6d9c0e67d85bcb700db4178c0303e93b8f4ad758b8e2c9fd8b3d0c28e585f1928334bb77d36782e8 f\n", + "786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce f\n", + ]; + + at.write("ck", &checksums.join("")); + + let output = "f: OK\n".to_string().repeat(checksums.len()); + + scene + .ccmd("b2sum") + .arg("-c") + .arg(at.subdir.join("ck")) + .succeeds() + .stdout_only(&output); + + scene + .ccmd("b2sum") + .arg("--strict") + .arg("-c") + .arg(at.subdir.join("ck")) + .succeeds() + .stdout_only(&output); +} + +#[test] +fn test_help_shows_correct_utility_name() { + // Test that help output shows the actual utility name instead of "hashsum" + let scene = TestScenario::new(util_name!()); + + // Test b2sum + scene + .ccmd("b2sum") + .arg("--help") + .succeeds() + .stdout_contains("Usage: b2sum") + .stdout_does_not_contain("Usage: hashsum"); +} diff --git a/tests/by-util/test_base64.rs b/tests/by-util/test_base64.rs index f3657bb779d..8b558f1a188 100644 --- a/tests/by-util/test_base64.rs +++ b/tests/by-util/test_base64.rs @@ -265,3 +265,12 @@ cyBvdmVyIHRoZSBsYXp5IGRvZy4= // cSpell:enable ); } + +#[test] +#[cfg(all(target_os = "linux", not(target_env = "musl")))] +fn test_read_error() { + new_ucmd!() + .arg("/proc/self/mem") + .fails() + .stderr_is("base64: read error: Input/output error\n"); +} diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 336d07a1c87..1034dfdfc54 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // -// spell-checker: ignore: AEDT AEST EEST NZDT NZST Kolkata Iseconds +// spell-checker: ignore: AEDT AEST EEST NZDT NZST Kolkata Iseconds févr février janv janvier mercredi samedi sommes use std::cmp::Ordering; @@ -1470,6 +1470,99 @@ fn test_date_posix_format_specifiers() { } } +#[test] +#[cfg(any(target_os = "linux", target_vendor = "apple"))] +fn test_date_format_b_french_locale() { + // Test both %B and %b formats with French locale using a loop + // This test expects localized month names when i18n support is available + let test_cases = [ + ("2025-01-15", "janvier", "janv."), // Wednesday = mercredi, mer. + ("2025-02-15", "février", "févr."), // Saturday = samedi, sam. + ]; + + for (date, expected_full, expected_abbrev) in &test_cases { + let result = new_ucmd!() + .env("LC_TIME", "fr_FR.UTF-8") + .env("TZ", "UTC") + .arg("-d") + .arg(date) + .arg("+%B %b") + .succeeds(); + + let output = result.stdout_str().trim(); + let expected = format!("{expected_full} {expected_abbrev}"); + + if output == expected { + // i18n feature is working - test passed + assert_eq!(output, expected); + } else { + // i18n feature not available, skip test + println!( + "Skipping French locale test for {date} - i18n feature not available, got: {output}" + ); + return; // Exit early if i18n not available + } + } +} + +#[test] +#[cfg(any(target_os = "linux", target_vendor = "apple"))] +fn test_date_format_a_french_locale() { + // Test both %A and %a formats with French locale using a loop + // This test expects localized day names when i18n support is available + let test_cases = [ + ("2025-01-15", "mercredi", "mer."), // Wednesday + ("2025-02-15", "samedi", "sam."), // Saturday + ]; + + for (date, expected_full, expected_abbrev) in &test_cases { + let result = new_ucmd!() + .env("LC_TIME", "fr_FR.UTF-8") + .env("TZ", "UTC") + .arg("-d") + .arg(date) + .arg("+%A %a") + .succeeds(); + + let output = result.stdout_str().trim(); + let expected = format!("{expected_full} {expected_abbrev}"); + + if output == expected { + // i18n feature is working - test passed + assert_eq!(output, expected); + } else { + // i18n feature not available, skip test + println!( + "Skipping French day locale test for {date} - i18n feature not available, got: {output}" + ); + return; // Exit early if i18n not available + } + } +} + +#[test] +#[cfg(any(target_os = "linux", target_vendor = "apple"))] +fn test_date_french_full_sentence() { + let result = new_ucmd!() + .env("LANG", "fr_FR.UTF-8") + .env("TZ", "UTC") + .arg("-d") + .arg("2026-01-21") + .arg("+Nous sommes le %A %d %B %Y") + .succeeds(); + + let output = result.stdout_str().trim(); + let expected = "Nous sommes le mercredi 21 janvier 2026"; + + if output == expected { + // i18n feature is working - test passed + assert_eq!(output, expected); + } else { + // i18n feature not available, skip test + println!("Skipping French full sentence test - i18n feature not available, got: {output}"); + } +} + /// Test that %x format specifier respects locale settings /// This is a regression test for locale-aware date formatting #[test] @@ -1497,3 +1590,279 @@ fn test_date_format_x_locale_aware() { .succeeds() .stdout_is("19/01/1997\n"); } + +#[test] +fn test_date_parenthesis_comment() { + // GNU compatibility: Text in parentheses is treated as a comment and removed. + let cases = [ + // (input, format, expected_output) + ("(", "+%H:%M:%S", "00:00:00\n"), + ("1(ignore comment to eol", "+%H:%M:%S", "01:00:00\n"), + ("2026-01-05(this is a comment", "+%Y-%m-%d", "2026-01-05\n"), + ("2026(this is a comment)-01-05", "+%Y-%m-%d", "2026-01-05\n"), + ("((foo)2026-01-05)", "+%H:%M:%S", "00:00:00\n"), // Nested/unbalanced case + ("(2026-01-05(foo))", "+%H:%M:%S", "00:00:00\n"), // Balanced parentheses removed (empty result) + ]; + + for (input, format, expected) in cases { + new_ucmd!() + .env("TZ", "UTC") + .arg("-d") + .arg(input) + .arg("-u") + .arg(format) + .succeeds() + .stdout_only(expected); + } +} + +#[test] +fn test_date_parenthesis_vs_other_special_chars() { + // Ensure parentheses are special but other chars like [, ., ^ are still rejected + for special_char in ["[", ".", "^"] { + new_ucmd!() + .arg("-d") + .arg(special_char) + .fails() + .stderr_contains("invalid date"); + } +} + +#[test] +#[cfg(unix)] +fn test_date_iranian_locale_solar_hijri_calendar() { + // Test Iranian locale uses Solar Hijri calendar + // Verify the Solar Hijri calendar is used in the Iranian locale + use std::process::Command; + + // Check if Iranian locale is available + let locale_check = Command::new("locale") + .env("LC_ALL", "fa_IR.UTF-8") + .arg("charmap") + .output(); + + let locale_available = match locale_check { + Ok(output) => String::from_utf8_lossy(&output.stdout).trim() == "UTF-8", + Err(_) => false, + }; + + if !locale_available { + println!("Skipping Iranian locale test - fa_IR.UTF-8 locale not available"); + return; + } + + // Get current year in Gregorian calendar + let current_year: i32 = new_ucmd!() + .env("LC_ALL", "C") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // 03-19 and 03-22 of the same Gregorian year are in different years in the + // Solar Hijri calendar + let year_march_19: i32 = new_ucmd!() + .env("LC_ALL", "fa_IR.UTF-8") + .arg("-d") + .arg(format!("{current_year}-03-19")) + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + let year_march_22: i32 = new_ucmd!() + .env("LC_ALL", "fa_IR.UTF-8") + .arg("-d") + .arg(format!("{current_year}-03-22")) + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // Years should differ by 1 + assert_eq!(year_march_19, year_march_22 - 1); + + // The difference between the Gregorian year is 621 or 622 years + assert_eq!(year_march_19, current_year - 622); + assert_eq!(year_march_22, current_year - 621); + + // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar + let iso_result = new_ucmd!() + .env("LC_ALL", "fa_IR.UTF-8") + .arg("--iso-8601=hours") + .succeeds(); + let iso_output = iso_result.stdout_str(); + assert!(iso_output.starts_with(¤t_year.to_string())); + + let rfc_result = new_ucmd!() + .env("LC_ALL", "fa_IR.UTF-8") + .arg("--rfc-3339=date") + .succeeds(); + let rfc_output = rfc_result.stdout_str(); + assert!(rfc_output.starts_with(¤t_year.to_string())); +} + +#[test] +#[cfg(unix)] +fn test_date_ethiopian_locale_calendar() { + // Test Ethiopian locale uses Ethiopian calendar + // Verify the Ethiopian calendar is used in the Ethiopian locale + use std::process::Command; + + // Check if Ethiopian locale is available + let locale_check = Command::new("locale") + .env("LC_ALL", "am_ET.UTF-8") + .arg("charmap") + .output(); + + let locale_available = match locale_check { + Ok(output) => String::from_utf8_lossy(&output.stdout).trim() == "UTF-8", + Err(_) => false, + }; + + if !locale_available { + println!("Skipping Ethiopian locale test - am_ET.UTF-8 locale not available"); + return; + } + + // Get current year in Gregorian calendar + let current_year: i32 = new_ucmd!() + .env("LC_ALL", "C") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // 09-10 and 09-12 of the same Gregorian year are in different years in the + // Ethiopian calendar + let year_september_10: i32 = new_ucmd!() + .env("LC_ALL", "am_ET.UTF-8") + .arg("-d") + .arg(format!("{current_year}-09-10")) + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + let year_september_12: i32 = new_ucmd!() + .env("LC_ALL", "am_ET.UTF-8") + .arg("-d") + .arg(format!("{current_year}-09-12")) + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // Years should differ by 1 + assert_eq!(year_september_10, year_september_12 - 1); + + // The difference between the Gregorian year is 7 or 8 years + assert_eq!(year_september_10, current_year - 8); + assert_eq!(year_september_12, current_year - 7); + + // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar + let iso_result = new_ucmd!() + .env("LC_ALL", "am_ET.UTF-8") + .arg("--iso-8601=hours") + .succeeds(); + let iso_output = iso_result.stdout_str(); + assert!(iso_output.starts_with(¤t_year.to_string())); + + let rfc_result = new_ucmd!() + .env("LC_ALL", "am_ET.UTF-8") + .arg("--rfc-3339=date") + .succeeds(); + let rfc_output = rfc_result.stdout_str(); + assert!(rfc_output.starts_with(¤t_year.to_string())); +} + +#[test] +#[cfg(unix)] +fn test_date_thai_locale_solar_calendar() { + // Test Thai locale uses Thai solar calendar + // Verify the Thai solar calendar is used with the Thai locale + use std::process::Command; + + // Check if Thai locale is available + let locale_check = Command::new("locale") + .env("LC_ALL", "th_TH.UTF-8") + .arg("charmap") + .output(); + + let locale_available = match locale_check { + Ok(output) => String::from_utf8_lossy(&output.stdout).trim() == "UTF-8", + Err(_) => false, + }; + + if !locale_available { + println!("Skipping Thai locale test - th_TH.UTF-8 locale not available"); + return; + } + + // Get current year in Gregorian calendar + let current_year: i32 = new_ucmd!() + .env("LC_ALL", "C") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // Since 1941, the year in the Thai solar calendar is the Gregorian year plus 543 + let thai_year: i32 = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + assert_eq!(thai_year, current_year + 543); + + // All months that have 31 days have names that end with "คม" (Thai characters) + let days_31_suffix = "\u{0E04}\u{0E21}"; // "คม" in Unicode + + for month in ["01", "03", "05", "07", "08", "10", "12"] { + let month_result = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("--date") + .arg(format!("{current_year}-{month}-01")) + .arg("+%B") + .succeeds(); + let month_name = month_result.stdout_str(); + + assert!( + month_name.trim().ends_with(days_31_suffix), + "Month {month} should end with 'คม', got: {month_name}" + ); + } + + // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar + let iso_result = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("--iso-8601=hours") + .succeeds(); + let iso_output = iso_result.stdout_str(); + assert!(iso_output.starts_with(¤t_year.to_string())); + + let rfc_result = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("--rfc-3339=date") + .succeeds(); + let rfc_output = rfc_result.stdout_str(); + assert!(rfc_output.starts_with(¤t_year.to_string())); +} diff --git a/tests/by-util/test_expand.rs b/tests/by-util/test_expand.rs index 741aad36640..78a0f6ae5f5 100644 --- a/tests/by-util/test_expand.rs +++ b/tests/by-util/test_expand.rs @@ -427,6 +427,15 @@ fn test_nonexisting_file() { .stdout_contains_line("// !note: file contains significant whitespace"); } +#[test] +#[cfg(all(target_os = "linux", not(target_env = "musl")))] +fn test_read_error() { + new_ucmd!() + .arg("/proc/self/mem") + .fails() + .stderr_contains("expand: /proc/self/mem: Input/output error"); +} + #[test] #[cfg(target_os = "linux")] fn test_expand_non_utf8_paths() { diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs index 9497044c910..1fe466ba5ad 100644 --- a/tests/by-util/test_fold.rs +++ b/tests/by-util/test_fold.rs @@ -4,7 +4,11 @@ // file that was distributed with this source code. // spell-checker:ignore fullwidth +use bytecount::count; +use unicode_width::UnicodeWidthChar; use uutests::new_ucmd; +use uutests::util::TestScenario; +use uutests::util_name; #[test] fn test_invalid_arg() { @@ -61,6 +65,301 @@ fn test_wide_characters_with_characters_option() { .stdout_is("\u{B250}\u{B250}\u{B250}\n"); } +#[test] +fn test_multiple_wide_characters_in_column_mode() { + let wide = '\u{FF1A}'; + let mut input = wide.to_string().repeat(50); + input.push('\n'); + + let mut expected = String::new(); + for i in 1..=50 { + expected.push(wide); + if i % 5 == 0 { + expected.push('\n'); + } + } + + new_ucmd!() + .args(&["-w", "10"]) + .pipe_in(input) + .succeeds() + .stdout_is(expected); +} + +#[test] +fn test_multiple_wide_characters_in_character_mode() { + let wide = '\u{FF1A}'; + let mut input = wide.to_string().repeat(50); + input.push('\n'); + + let mut expected = String::new(); + for i in 1..=50 { + expected.push(wide); + if i % 10 == 0 { + expected.push('\n'); + } + } + + new_ucmd!() + .args(&["--characters", "-w", "10"]) + .pipe_in(input) + .succeeds() + .stdout_is(expected); +} + +#[test] +fn test_unicode_on_reader_buffer_boundary_in_character_mode() { + let boundary = buf_reader_capacity().saturating_sub(1); + assert!(boundary > 0, "BufReader capacity must be greater than 1"); + + let mut input = "a".repeat(boundary); + input.push('\u{B250}'); + input.push_str(&"a".repeat(100)); + input.push('\n'); + + let expected_tail = tail_inclusive(&fold_characters_reference(&input, 80), 4); + + let result = new_ucmd!().arg("--characters").pipe_in(input).succeeds(); + + let actual_tail = tail_inclusive(result.stdout_str(), 4); + + assert_eq!(actual_tail, expected_tail); +} + +#[test] +fn test_fold_preserves_invalid_utf8_sequences() { + let bad_input: &[u8] = b"\xC3|\xED\xBA\xAD|\x00|\x89|\xED\xA6\xBF\xED\xBF\xBF\n"; + + new_ucmd!() + .pipe_in(bad_input.to_vec()) + .succeeds() + .stdout_is_bytes(bad_input); +} + +#[test] +fn test_fold_preserves_incomplete_utf8_at_eof() { + let trailing_byte: &[u8] = b"\xC3"; + + new_ucmd!() + .pipe_in(trailing_byte.to_vec()) + .succeeds() + .stdout_is_bytes(trailing_byte); +} + +#[test] +fn test_zero_width_bytes_in_column_mode() { + let len = io_buf_size_times_two(); + let input = vec![0u8; len]; + + new_ucmd!() + .pipe_in(input.clone()) + .succeeds() + .stdout_is_bytes(input); +} + +#[test] +fn test_zero_width_bytes_in_character_mode() { + let len = io_buf_size_times_two(); + let input = vec![0u8; len]; + let expected = fold_characters_reference_bytes(&input, 80); + + new_ucmd!() + .args(&["--characters"]) + .pipe_in(input) + .succeeds() + .stdout_is_bytes(expected); +} + +#[test] +fn test_zero_width_spaces_in_column_mode() { + let len = io_buf_size_times_two(); + let input = "\u{200B}".repeat(len); + + new_ucmd!() + .pipe_in(input.clone()) + .succeeds() + .stdout_is(&input); +} + +#[test] +fn test_zero_width_spaces_in_character_mode() { + let len = io_buf_size_times_two(); + let input = "\u{200B}".repeat(len); + let expected = fold_characters_reference(&input, 80); + + new_ucmd!() + .args(&["--characters"]) + .pipe_in(input) + .succeeds() + .stdout_is(&expected); +} + +#[test] +fn test_zero_width_bytes_from_file() { + let len = io_buf_size_times_two(); + let input = vec![0u8; len]; + let expected = fold_characters_reference_bytes(&input, 80); + + let ts = TestScenario::new(util_name!()); + let path = "zeros.bin"; + ts.fixtures.write_bytes(path, &input); + + ts.ucmd().arg(path).succeeds().stdout_is_bytes(&input); + + ts.ucmd() + .args(&["--characters", path]) + .succeeds() + .stdout_is_bytes(expected); +} + +#[test] +fn test_zero_width_spaces_from_file() { + let len = io_buf_size_times_two(); + let input = "\u{200B}".repeat(len); + let expected = fold_characters_reference(&input, 80); + + let ts = TestScenario::new(util_name!()); + let path = "zero-width.txt"; + ts.fixtures.write(path, &input); + + ts.ucmd().arg(path).succeeds().stdout_is(&input); + + ts.ucmd() + .args(&["--characters", path]) + .succeeds() + .stdout_is(&expected); +} + +#[test] +fn test_zero_width_data_line_counts() { + let len = io_buf_size_times_two(); + + let zero_bytes = vec![0u8; len]; + let column_bytes = new_ucmd!().pipe_in(zero_bytes.clone()).succeeds(); + assert_eq!( + newline_count(column_bytes.stdout()), + 0, + "fold should not wrap zero-width bytes in column mode", + ); + + let characters_bytes = new_ucmd!() + .args(&["--characters"]) + .pipe_in(zero_bytes) + .succeeds(); + assert_eq!( + newline_count(characters_bytes.stdout()), + len / 80, + "fold --characters should wrap zero-width bytes every 80 bytes", + ); + + if UnicodeWidthChar::width('\u{200B}') != Some(0) { + eprintln!("skip zero width space checks because width != 0"); + return; + } + + let zero_width_spaces = "\u{200B}".repeat(len); + let column_spaces = new_ucmd!().pipe_in(zero_width_spaces.clone()).succeeds(); + assert_eq!( + newline_count(column_spaces.stdout()), + 0, + "fold should keep zero-width spaces on a single line in column mode", + ); + + let characters_spaces = new_ucmd!() + .args(&["--characters"]) + .pipe_in(zero_width_spaces) + .succeeds(); + assert_eq!( + newline_count(characters_spaces.stdout()), + len / 80, + "fold --characters should wrap zero-width spaces every 80 characters", + ); +} + +#[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "netbsd"))] +#[test] +fn test_fold_reports_no_space_left_on_dev_full() { + use std::fs::OpenOptions; + use std::process::Stdio; + + for &byte in &[b'\n', b'\0', 0xC3u8] { + let dev_full = OpenOptions::new() + .write(true) + .open("/dev/full") + .expect("/dev/full must exist on supported targets"); + + new_ucmd!() + .pipe_in(vec![byte; 1024]) + .set_stdout(Stdio::from(dev_full)) + .fails() + .stderr_contains("No space left"); + } +} + +fn buf_reader_capacity() -> usize { + std::io::BufReader::new(&b""[..]).capacity() +} + +fn io_buf_size_times_two() -> usize { + buf_reader_capacity() + .checked_mul(2) + .expect("BufReader capacity overflow") +} + +fn fold_characters_reference(input: &str, width: usize) -> String { + let mut output = String::with_capacity(input.len()); + let mut col_count = 0usize; + + for ch in input.chars() { + if ch == '\n' { + output.push('\n'); + col_count = 0; + continue; + } + + if col_count >= width { + output.push('\n'); + col_count = 0; + } + + output.push(ch); + col_count += 1; + } + + output +} + +fn fold_characters_reference_bytes(input: &[u8], width: usize) -> Vec { + let mut output = Vec::with_capacity(input.len() + input.len() / width + 1); + + for chunk in input.chunks(width) { + output.extend_from_slice(chunk); + if chunk.len() == width { + output.push(b'\n'); + } + } + + output +} + +fn newline_count(bytes: &[u8]) -> usize { + count(bytes, b'\n') +} + +fn tail_inclusive(text: &str, lines: usize) -> String { + if lines == 0 { + return String::new(); + } + + let segments: Vec<&str> = text.split_inclusive('\n').collect(); + if segments.is_empty() { + return text.to_owned(); + } + + let start = segments.len().saturating_sub(lines); + segments[start..].concat() +} + #[test] fn test_should_preserve_empty_line_without_final_newline() { new_ucmd!() diff --git a/tests/by-util/test_hashsum.rs b/tests/by-util/test_hashsum.rs index 2f1719b0eca..c139469d66c 100644 --- a/tests/by-util/test_hashsum.rs +++ b/tests/by-util/test_hashsum.rs @@ -201,22 +201,16 @@ macro_rules! test_digest_with_len { }; } -test_digest! {md5, md5} -test_digest! {sha1, sha1} test_digest! {b3sum, b3sum} test_digest! {shake128, shake128} test_digest! {shake256, shake256} -test_digest_with_len! {sha224, sha224, 224} -test_digest_with_len! {sha256, sha256, 256} -test_digest_with_len! {sha384, sha384, 384} -test_digest_with_len! {sha512, sha512, 512} test_digest_with_len! {sha3_224, sha3, 224} test_digest_with_len! {sha3_256, sha3, 256} test_digest_with_len! {sha3_384, sha3, 384} test_digest_with_len! {sha3_512, sha3, 512} -test_digest_with_len! {b2sum, b2sum, 512} +#[ignore = "moved to standalone"] #[test] fn test_check_sha1() { // To make sure that #3815 doesn't happen again @@ -237,6 +231,7 @@ fn test_check_sha1() { .stderr_is(""); } +#[ignore = "moved to standalone"] #[test] fn test_check_md5_ignore_missing() { let scene = TestScenario::new(util_name!()); @@ -271,6 +266,7 @@ fn test_check_md5_ignore_missing() { .stderr_contains("the --ignore-missing option is meaningful only when verifying checksums"); } +#[ignore = "moved to standalone"] #[test] fn test_check_b2sum_length_option_0() { let scene = TestScenario::new(util_name!()); @@ -288,6 +284,7 @@ fn test_check_b2sum_length_option_0() { .stdout_only("testf: OK\n"); } +#[ignore = "moved to standalone"] #[test] fn test_check_b2sum_length_duplicate() { let scene = TestScenario::new(util_name!()); @@ -304,6 +301,7 @@ fn test_check_b2sum_length_duplicate() { .stdout_contains("d6d45901dec53e65d2b55fb6e2ab67b0"); } +#[ignore = "moved to standalone"] #[test] fn test_check_b2sum_length_option_8() { let scene = TestScenario::new(util_name!()); @@ -321,6 +319,7 @@ fn test_check_b2sum_length_option_8() { .stdout_only("testf: OK\n"); } +#[ignore = "moved to standalone"] #[test] fn test_invalid_b2sum_length_option_not_multiple_of_8() { let scene = TestScenario::new(util_name!()); @@ -338,8 +337,11 @@ fn test_invalid_b2sum_length_option_not_multiple_of_8() { } #[rstest] +#[ignore = "moved to standalone"] #[case("513")] +#[ignore = "moved to standalone"] #[case("1024")] +#[ignore = "moved to standalone"] #[case("18446744073709552000")] fn test_invalid_b2sum_length_option_too_large(#[case] len: &str) { let scene = TestScenario::new(util_name!()); @@ -358,6 +360,7 @@ fn test_invalid_b2sum_length_option_too_large(#[case] len: &str) { .stderr_contains("b2sum: maximum digest length for 'BLAKE2b' is 512 bits"); } +#[ignore = "moved to standalone"] #[test] fn test_check_b2sum_tag_output() { let scene = TestScenario::new(util_name!()); @@ -382,6 +385,7 @@ fn test_check_b2sum_tag_output() { .stdout_only("BLAKE2b-128 (f) = cae66941d9efbd404e4d88758ea67670\n"); } +#[ignore = "moved to standalone"] #[test] fn test_check_b2sum_verify() { let scene = TestScenario::new(util_name!()); @@ -406,6 +410,7 @@ fn test_check_b2sum_verify() { .stdout_only("BLAKE2b-128 (a) = b93e0fc7bb21633c08bba07c5e71dc00\n"); } +#[ignore = "moved to standalone"] #[test] fn test_check_file_not_found_warning() { let scene = TestScenario::new(util_name!()); @@ -428,6 +433,7 @@ fn test_check_file_not_found_warning() { // Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace. // ref: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions +#[ignore = "moved to standalone"] #[test] fn test_check_md5sum() { let scene = TestScenario::new(util_name!()); @@ -478,6 +484,7 @@ fn test_check_md5sum() { } // GNU also supports one line sep +#[ignore = "moved to standalone"] #[test] fn test_check_md5sum_only_one_space() { let scene = TestScenario::new(util_name!()); @@ -501,6 +508,7 @@ fn test_check_md5sum_only_one_space() { .stdout_only("a: OK\n b: OK\nc: OK\n"); } +#[ignore = "moved to standalone"] #[test] fn test_check_md5sum_reverse_bsd() { let scene = TestScenario::new(util_name!()); @@ -550,6 +558,7 @@ fn test_check_md5sum_reverse_bsd() { } } +#[ignore = "moved to standalone"] #[test] fn test_check_md5sum_mixed_format() { let scene = TestScenario::new(util_name!()); @@ -606,6 +615,7 @@ fn test_conflicting_arg() { .fails_with_code(1); } +#[ignore = "moved to standalone"] #[test] fn test_tag() { let scene = TestScenario::new(util_name!()); @@ -622,6 +632,7 @@ fn test_tag() { ); } +#[ignore = "moved to standalone"] #[test] #[cfg(not(windows))] fn test_with_escape_filename() { @@ -637,6 +648,7 @@ fn test_with_escape_filename() { assert!(stdout.trim().ends_with("a\\nb")); } +#[ignore = "moved to standalone"] #[test] #[cfg(not(windows))] fn test_with_escape_filename_zero_text() { @@ -657,6 +669,7 @@ fn test_with_escape_filename_zero_text() { assert!(stdout.contains("a\nb")); } +#[ignore = "moved to standalone"] #[test] fn test_check_empty_line() { let scene = TestScenario::new(util_name!()); @@ -675,6 +688,7 @@ fn test_check_empty_line() { .stderr_contains("WARNING: 1 line is improperly formatted"); } +#[ignore = "moved to standalone"] #[test] #[cfg(not(windows))] fn test_check_with_escape_filename() { @@ -699,6 +713,7 @@ fn test_check_with_escape_filename() { result.stdout_is("\\a\\nb: OK\n"); } +#[ignore = "moved to standalone"] #[test] fn test_check_strict_error() { let scene = TestScenario::new(util_name!()); @@ -718,6 +733,7 @@ fn test_check_strict_error() { .stderr_contains("WARNING: 3 lines are improperly formatted"); } +#[ignore = "moved to standalone"] #[test] fn test_check_warn() { let scene = TestScenario::new(util_name!()); @@ -746,6 +762,7 @@ fn test_check_warn() { .fails(); } +#[ignore = "moved to standalone"] #[test] fn test_check_status() { let scene = TestScenario::new(util_name!()); @@ -762,6 +779,7 @@ fn test_check_status() { .no_output(); } +#[ignore = "moved to standalone"] #[test] fn test_check_status_code() { let scene = TestScenario::new(util_name!()); @@ -779,6 +797,7 @@ fn test_check_status_code() { .stdout_is(""); } +#[ignore = "moved to standalone"] #[test] fn test_sha1_with_md5sum_should_fail() { let scene = TestScenario::new(util_name!()); @@ -795,6 +814,7 @@ fn test_sha1_with_md5sum_should_fail() { .stderr_does_not_contain("WARNING: 1 line is improperly formatted"); } +#[ignore = "moved to standalone"] #[test] // Disabled on Windows because of the "*" #[cfg(not(windows))] @@ -834,6 +854,7 @@ fn test_check_one_two_space_star() { .stdout_is("*empty: OK\n"); } +#[ignore = "moved to standalone"] #[test] // Disabled on Windows because of the "*" #[cfg(not(windows))] @@ -876,6 +897,7 @@ fn test_check_space_star_or_not() { .stderr_contains("WARNING: 1 line is improperly formatted"); } +#[ignore = "moved to standalone"] #[test] fn test_check_no_backslash_no_space() { let scene = TestScenario::new(util_name!()); @@ -891,6 +913,7 @@ fn test_check_no_backslash_no_space() { .stdout_is("f: OK\n"); } +#[ignore = "moved to standalone"] #[test] fn test_incomplete_format() { let scene = TestScenario::new(util_name!()); @@ -906,6 +929,7 @@ fn test_incomplete_format() { .stderr_contains("no properly formatted checksum lines found"); } +#[ignore = "moved to standalone"] #[test] fn test_start_error() { let scene = TestScenario::new(util_name!()); @@ -923,6 +947,7 @@ fn test_start_error() { .stderr_contains("WARNING: 1 line is improperly formatted"); } +#[ignore = "moved to standalone"] #[test] fn test_check_check_ignore_no_file() { let scene = TestScenario::new(util_name!()); @@ -939,6 +964,7 @@ fn test_check_check_ignore_no_file() { .stderr_contains("in.md5: no file was verified"); } +#[ignore = "moved to standalone"] #[test] fn test_check_directory_error() { let scene = TestScenario::new(util_name!()); @@ -958,6 +984,7 @@ fn test_check_directory_error() { .stderr_contains(err_msg); } +#[ignore = "moved to standalone"] #[test] #[cfg(not(windows))] fn test_continue_after_directory_error() { @@ -990,6 +1017,7 @@ fn test_continue_after_directory_error() { .stderr_is(err_msg); } +#[ignore = "moved to standalone"] #[test] fn test_check_quiet() { let scene = TestScenario::new(util_name!()); @@ -1030,6 +1058,7 @@ fn test_check_quiet() { .stderr_contains("md5sum: the --strict option is meaningful only when verifying checksums"); } +#[ignore = "moved to standalone"] #[test] fn test_star_to_start() { let scene = TestScenario::new(util_name!()); @@ -1045,6 +1074,7 @@ fn test_star_to_start() { .stdout_only("f: OK\n"); } +#[ignore = "moved to standalone"] #[test] fn test_check_b2sum_strict_check() { let scene = TestScenario::new(util_name!()); @@ -1081,6 +1111,7 @@ fn test_check_b2sum_strict_check() { .stdout_only(&output); } +#[ignore = "moved to standalone"] #[test] fn test_check_md5_comment_line() { // A comment in a checksum file shall be discarded unnoticed. @@ -1106,6 +1137,7 @@ fn test_check_md5_comment_line() { .no_stderr(); } +#[ignore = "moved to standalone"] #[test] fn test_check_md5_comment_only() { // A file only filled with comments is equivalent to an empty file, @@ -1125,6 +1157,7 @@ fn test_check_md5_comment_only() { .stderr_contains("no properly formatted checksum lines found"); } +#[ignore = "moved to standalone"] #[test] fn test_check_md5_comment_leading_space() { // A file only filled with comments is equivalent to an empty file, @@ -1149,6 +1182,7 @@ fn test_check_md5_comment_leading_space() { .stderr_contains("WARNING: 1 line is improperly formatted"); } +#[ignore = "moved to standalone"] #[test] fn test_sha256_binary() { let ts = TestScenario::new(util_name!()); @@ -1165,6 +1199,7 @@ fn test_sha256_binary() { ); } +#[ignore = "moved to standalone"] #[test] fn test_sha256_stdin_binary() { let ts = TestScenario::new(util_name!()); @@ -1182,8 +1217,8 @@ fn test_sha256_stdin_binary() { } // This test is currently disabled on windows +#[ignore = "moved to standalone"] #[test] -#[cfg_attr(windows, ignore = "Discussion is in #9168")] fn test_check_sha256_binary() { new_ucmd!() .args(&["--sha256", "--check", "binary.sha256.checkfile"]) @@ -1198,28 +1233,28 @@ fn test_help_shows_correct_utility_name() { let scene = TestScenario::new(util_name!()); // Test md5sum - scene - .ccmd("md5sum") - .arg("--help") - .succeeds() - .stdout_contains("Usage: md5sum") - .stdout_does_not_contain("Usage: hashsum"); + // scene + // .ccmd("md5sum") + // .arg("--help") + // .succeeds() + // .stdout_contains("Usage: md5sum") + // .stdout_does_not_contain("Usage: hashsum"); // Test sha256sum - scene - .ccmd("sha256sum") - .arg("--help") - .succeeds() - .stdout_contains("Usage: sha256sum") - .stdout_does_not_contain("Usage: hashsum"); + // scene + // .ccmd("sha256sum") + // .arg("--help") + // .succeeds() + // .stdout_contains("Usage: sha256sum") + // .stdout_does_not_contain("Usage: hashsum"); // Test b2sum - scene - .ccmd("b2sum") - .arg("--help") - .succeeds() - .stdout_contains("Usage: b2sum") - .stdout_does_not_contain("Usage: hashsum"); + // scene + // .ccmd("b2sum") + // .arg("--help") + // .succeeds() + // .stdout_contains("Usage: b2sum") + // .stdout_does_not_contain("Usage: hashsum"); // Test that generic hashsum still shows the correct usage scene diff --git a/tests/by-util/test_md5sum.rs b/tests/by-util/test_md5sum.rs new file mode 100644 index 00000000000..6ccf173cf15 --- /dev/null +++ b/tests/by-util/test_md5sum.rs @@ -0,0 +1,812 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use uutests::new_ucmd; +use uutests::util::TestScenario; +use uutests::util_name; +// spell-checker:ignore checkfile, testf, ntestf +macro_rules! get_hash( + ($str:expr) => ( + $str.split(' ').collect::>()[0] + ); +); + +macro_rules! test_digest { + ($id:ident) => { + mod $id { + use uutests::util::*; + use uutests::util_name; + static EXPECTED_FILE: &'static str = concat!(stringify!($id), ".expected"); + static CHECK_FILE: &'static str = concat!(stringify!($id), ".checkfile"); + static INPUT_FILE: &'static str = "input.txt"; + + #[test] + fn test_single_file() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_stdin() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .pipe_in_fixture(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_check() { + let ts = TestScenario::new(util_name!()); + println!("File content='{}'", ts.fixtures.read(INPUT_FILE)); + println!("Check file='{}'", ts.fixtures.read(CHECK_FILE)); + + ts.ucmd() + .args(&["--check", CHECK_FILE]) + .succeeds() + .no_stderr() + .stdout_is("input.txt: OK\n"); + } + + #[test] + fn test_zero() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg("--zero") + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_missing_file() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.write("a", "file1\n"); + at.write("c", "file3\n"); + + ts.ucmd() + .args(&["a", "b", "c"]) + .fails() + .stdout_contains("a\n") + .stdout_contains("c\n") + .stderr_contains("b: No such file or directory"); + } + } + }; +} + +test_digest! {md5} + +#[test] +fn test_check_md5_ignore_missing() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("testf", "foobar\n"); + at.write( + "testf.sha1", + "14758f1afd44c09b7992073ccf00b43d testf\n14758f1afd44c09b7992073ccf00b43d testf2\n", + ); + scene + .ccmd("md5sum") + .arg("-c") + .arg(at.subdir.join("testf.sha1")) + .fails() + .stdout_contains("testf2: FAILED open or read"); + + scene + .ccmd("md5sum") + .arg("-c") + .arg("--ignore-missing") + .arg(at.subdir.join("testf.sha1")) + .succeeds() + .stdout_is("testf: OK\n") + .stderr_is(""); + + scene + .ccmd("md5sum") + .arg("--ignore-missing") + .arg(at.subdir.join("testf.sha1")) + .fails() + .stderr_contains( + "md5sum: the --ignore-missing option is meaningful only when verifying checksums", + ); +} + +// Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace. +// ref: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions +#[test] +fn test_check_md5sum() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + #[cfg(not(windows))] + { + for f in &["a", " b", "*c", "dd", " "] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + f5b61709718c1ecf8db1aea8547d4698 *c\n\ + b064a020db8018f18ff5ae367d01b212 dd\n\ + d784fa8b6d98d27699781bd9a7cf19f0 ", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n") + .stderr_is(""); + } + #[cfg(windows)] + { + for f in &["a", " b", "dd"] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + b064a020db8018f18ff5ae367d01b212 dd", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_is("a: OK\n b: OK\ndd: OK\n") + .stderr_is(""); + } +} + +// GNU also supports one line sep +#[test] +fn test_check_md5sum_only_one_space() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + for f in ["a", " b", "c"] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + 2cd6ee2c70b0bde53fbe6cac3c8b8bb1 c\n", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_only("a: OK\n b: OK\nc: OK\n"); +} + +#[test] +fn test_check_md5sum_reverse_bsd() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + #[cfg(not(windows))] + { + for f in &["a", " b", "*c", "dd", " "] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + f5b61709718c1ecf8db1aea8547d4698 *c\n\ + b064a020db8018f18ff5ae367d01b212 dd\n\ + d784fa8b6d98d27699781bd9a7cf19f0 ", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n") + .stderr_is(""); + } + #[cfg(windows)] + { + for f in &["a", " b", "dd"] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + b064a020db8018f18ff5ae367d01b212 dd", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_is("a: OK\n b: OK\ndd: OK\n") + .stderr_is(""); + } +} + +#[test] +fn test_check_md5sum_mixed_format() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + #[cfg(not(windows))] + { + for f in &[" b", "*c", "dd", " "] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "bf35d7536c785cf06730d5a40301eba2 b\n\ + f5b61709718c1ecf8db1aea8547d4698 *c\n\ + b064a020db8018f18ff5ae367d01b212 dd\n\ + d784fa8b6d98d27699781bd9a7cf19f0 ", + ); + } + #[cfg(windows)] + { + for f in &[" b", "dd"] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "bf35d7536c785cf06730d5a40301eba2 b\n\ + b064a020db8018f18ff5ae367d01b212 dd", + ); + } + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .fails_with_code(1); +} + +#[test] +fn test_invalid_arg() { + new_ucmd!().arg("--definitely-invalid").fails_with_code(1); +} + +#[test] +fn test_conflicting_arg() { + new_ucmd!().arg("--tag").arg("--check").fails_with_code(1); + new_ucmd!().arg("--tag").arg("--text").fails_with_code(1); +} + +#[test] +#[cfg_attr(windows, ignore = "Disabled on windows")] +fn test_with_escape_filename() { + let scene = TestScenario::new(util_name!()); + + let at = &scene.fixtures; + let filename = "a\nb"; + at.touch(filename); + let result = scene.ccmd("md5sum").arg("--text").arg(filename).succeeds(); + let stdout = result.stdout_str(); + println!("stdout {stdout}"); + assert!(stdout.starts_with('\\')); + assert!(stdout.trim().ends_with("a\\nb")); +} + +#[test] +#[cfg_attr(windows, ignore = "Disabled on windows")] +fn test_with_escape_filename_zero_text() { + let scene = TestScenario::new(util_name!()); + + let at = &scene.fixtures; + let filename = "a\nb"; + at.touch(filename); + let result = scene + .ccmd("md5sum") + .arg("--text") + .arg("--zero") + .arg(filename) + .succeeds(); + let stdout = result.stdout_str(); + println!("stdout {stdout}"); + assert!(!stdout.starts_with('\\')); + assert!(stdout.contains("a\nb")); +} + +#[test] +fn test_check_empty_line() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write( + "in.md5", + "d41d8cd98f00b204e9800998ecf8427e f\n\nd41d8cd98f00b204e9800998ecf8427e f\ninvalid\n\n", + ); + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stderr_contains("WARNING: 1 line is improperly formatted"); +} + +#[test] +#[cfg_attr(windows, ignore = "Disabled on windows")] +fn test_check_with_escape_filename() { + let scene = TestScenario::new(util_name!()); + + let at = &scene.fixtures; + + let filename = "a\nb"; + at.touch(filename); + let result = scene.ccmd("md5sum").arg("--tag").arg(filename).succeeds(); + let stdout = result.stdout_str(); + println!("stdout {stdout}"); + assert!(stdout.starts_with("\\MD5")); + assert!(stdout.contains("a\\nb")); + at.write("check.md5", stdout); + let result = scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5") + .succeeds(); + result.stdout_is("\\a\\nb: OK\n"); +} + +#[test] +fn test_check_strict_error() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write( + "in.md5", + "ERR\nERR\nd41d8cd98f00b204e9800998ecf8427e f\nERR\n", + ); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--strict") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains("WARNING: 3 lines are improperly formatted"); +} + +#[test] +fn test_check_warn() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write( + "in.md5", + "d41d8cd98f00b204e9800998ecf8427e f\nd41d8cd98f00b204e9800998ecf8427e f\ninvalid\n", + ); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--warn") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stderr_contains("in.md5: 3: improperly formatted MD5 checksum line") + .stderr_contains("WARNING: 1 line is improperly formatted"); + + // with strict, we should fail the execution + scene + .ccmd("md5sum") + .arg("--check") + .arg("--strict") + .arg(at.subdir.join("in.md5")) + .fails(); +} + +#[test] +fn test_check_status() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427f\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--status") + .arg(at.subdir.join("in.md5")) + .fails() + .no_output(); +} + +#[test] +fn test_check_status_code() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f f\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--status") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_is("") + .stdout_is(""); +} + +#[test] +fn test_sha1_with_md5sum_should_fail() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("f.sha1", "SHA1 (f) = d41d8cd98f00b204e9800998ecf8427e\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("f.sha1")) + .fails() + .stderr_contains("f.sha1: no properly formatted checksum lines found") + .stderr_does_not_contain("WARNING: 1 line is improperly formatted"); +} + +#[test] +// Disabled on Windows because of the "*" +#[cfg_attr(windows, ignore = "Disabled on windows")] +fn test_check_one_two_space_star() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("empty"); + + // with one space, the "*" is removed + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427e *empty\n"); + + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stdout_is("empty: OK\n"); + + // with two spaces, the "*" is not removed + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427e *empty\n"); + // First should fail as *empty doesn't exit + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .fails() + .stdout_is("*empty: FAILED open or read\n"); + + at.touch("*empty"); + // Should pass as we have the file + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stdout_is("*empty: OK\n"); +} + +#[test] +// Disabled on Windows because of the "*" +#[cfg_attr(windows, ignore = "Disabled on windows")] +fn test_check_space_star_or_not() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("a"); + at.touch("*c"); + + // with one space, the "*" is removed + at.write( + "in.md5", + "d41d8cd98f00b204e9800998ecf8427e *c\n + d41d8cd98f00b204e9800998ecf8427e a\n", + ); + + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .fails() + .stdout_contains("c: FAILED") + .stdout_does_not_contain("a: FAILED") + .stderr_contains("WARNING: 1 line is improperly formatted"); + + at.write( + "in.md5", + "d41d8cd98f00b204e9800998ecf8427e a\n + d41d8cd98f00b204e9800998ecf8427e *c\n", + ); + + // First should fail as *empty doesn't exit + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stdout_contains("a: OK") + .stderr_contains("WARNING: 1 line is improperly formatted"); +} + +#[test] +fn test_check_no_backslash_no_space() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427e\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stdout_is("f: OK\n"); +} + +#[test] +fn test_incomplete_format() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("in.md5", "MD5 (\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains("no properly formatted checksum lines found"); +} + +#[test] +fn test_start_error() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("in.md5", "ERR\nd41d8cd98f00b204e9800998ecf8427e f\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--strict") + .arg(at.subdir.join("in.md5")) + .fails() + .stdout_is("f: OK\n") + .stderr_contains("WARNING: 1 line is improperly formatted"); +} + +#[test] +fn test_check_check_ignore_no_file() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f missing\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg("--ignore-missing") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains("in.md5: no file was verified"); +} + +#[test] +fn test_check_directory_error() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.mkdir("d"); + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f d\n"); + #[cfg(not(windows))] + let err_msg = "md5sum: d: Is a directory\n"; + #[cfg(windows)] + let err_msg = "md5sum: d: Permission denied\n"; + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains(err_msg); +} + +#[test] +#[cfg(not(windows))] +fn test_continue_after_directory_error() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.mkdir("d"); + at.touch("file"); + at.touch("no_read_perms"); + at.set_mode("no_read_perms", 200); + + let (out, err_msg) = ( + "d41d8cd98f00b204e9800998ecf8427e file\n", + [ + "md5sum: d: Is a directory", + "md5sum: dne: No such file or directory", + "md5sum: no_read_perms: Permission denied\n", + ] + .join("\n"), + ); + + scene + .ccmd("md5sum") + .arg("d") + .arg("dne") + .arg("no_read_perms") + .arg("file") + .fails() + .stdout_is(out) + .stderr_is(err_msg); +} + +#[test] +fn test_check_quiet() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427e f\n"); + scene + .ccmd("md5sum") + .arg("--quiet") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .succeeds() + .no_output(); + + // incorrect md5 + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f f\n"); + scene + .ccmd("md5sum") + .arg("--quiet") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .fails() + .stdout_contains("f: FAILED") + .stderr_contains("WARNING: 1 computed checksum did NOT match"); + + scene + .ccmd("md5sum") + .arg("--quiet") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains("md5sum: the --quiet option is meaningful only when verifying checksums"); + scene + .ccmd("md5sum") + .arg("--strict") + .arg(at.subdir.join("in.md5")) + .fails() + .stderr_contains("md5sum: the --strict option is meaningful only when verifying checksums"); +} + +#[test] +fn test_star_to_start() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.touch("f"); + at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427e *f\n"); + scene + .ccmd("md5sum") + .arg("--check") + .arg(at.subdir.join("in.md5")) + .succeeds() + .stdout_only("f: OK\n"); +} + +#[test] +fn test_check_md5_comment_line() { + // A comment in a checksum file shall be discarded unnoticed. + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("foo", "foo-content\n"); + at.write( + "MD5SUM", + "\ + # This is a comment\n\ + 8411029f3f5b781026a93db636aca721 foo\n\ + # next comment is empty\n#", + ); + + scene + .ccmd("md5sum") + .arg("--check") + .arg("MD5SUM") + .succeeds() + .stdout_contains("foo: OK") + .no_stderr(); +} + +#[test] +fn test_check_md5_comment_only() { + // A file only filled with comments is equivalent to an empty file, + // and therefore produces an error. + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("foo", "foo-content\n"); + at.write("MD5SUM", "# This is a comment\n"); + + scene + .ccmd("md5sum") + .arg("--check") + .arg("MD5SUM") + .fails() + .stderr_contains("no properly formatted checksum lines found"); +} + +#[test] +fn test_check_md5_comment_leading_space() { + // A file only filled with comments is equivalent to an empty file, + // and therefore produces an error. + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("foo", "foo-content\n"); + at.write( + "MD5SUM", + " # This is a comment\n\ + 8411029f3f5b781026a93db636aca721 foo\n", + ); + + scene + .ccmd("md5sum") + .arg("--check") + .arg("MD5SUM") + .succeeds() + .stdout_contains("foo: OK") + .stderr_contains("WARNING: 1 line is improperly formatted"); +} + +#[test] +fn test_help_shows_correct_utility_name() { + // Test md5sum + new_ucmd!() + .arg("--help") + .succeeds() + .stdout_contains("Usage: md5sum") + .stdout_does_not_contain("Usage: hashsum"); +} diff --git a/tests/by-util/test_mkdir.rs b/tests/by-util/test_mkdir.rs index 5d68fadfd10..0756cb5d645 100644 --- a/tests/by-util/test_mkdir.rs +++ b/tests/by-util/test_mkdir.rs @@ -908,6 +908,33 @@ fn test_mkdir_parent_mode_with_explicit_mode() { ); } +/// Test that nested directories inherit the setgid bit with mkdir -p. +#[test] +#[cfg(target_os = "linux")] +fn test_mkdir_parent_inherits_setgid() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("parent"); + at.set_mode("parent", 0o2755); + + ucmd.arg("-p") + .arg("parent/child/grandchild") + .succeeds() + .no_stderr() + .no_stdout(); + + // All descendants should inherit the setgid bit (0o2000) + assert_eq!(at.metadata("parent").permissions().mode() & 0o2000, 0o2000); + assert_eq!( + at.metadata("parent/child").permissions().mode() & 0o2000, + 0o2000 + ); + assert_eq!( + at.metadata("parent/child/grandchild").permissions().mode() & 0o2000, + 0o2000 + ); +} + #[test] fn test_mkdir_concurrent_creation() { // Test concurrent mkdir -p operations: 10 iterations, 8 threads, 40 levels nesting diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index 5592f9c1e5c..d756cea7d7e 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -626,11 +626,17 @@ fn test_mv_symlink_into_target() { #[cfg(target_os = "linux")] #[test] fn test_mv_broken_symlink_to_another_fs() { + use tempfile::TempDir; + let scene = TestScenario::new(util_name!()); scene.fixtures.mkdir("foo"); scene.fixtures.symlink_file("missing", "foo/dangling"); - let dest = "/dev/shm/foo"; + + let other_fs_tempdir = + TempDir::new_in("/dev/shm/").expect("Unable to create temp directory in /dev/shm"); + let dest = other_fs_tempdir.path().join("foo"); + scene .ucmd() .arg("foo") diff --git a/tests/by-util/test_rm.rs b/tests/by-util/test_rm.rs index 20d4a935714..e262e9612b1 100644 --- a/tests/by-util/test_rm.rs +++ b/tests/by-util/test_rm.rs @@ -767,12 +767,22 @@ fn test_current_or_parent_dir_rm4() { at.mkdir("d"); + let file_1 = "file1"; + let file_2 = "d/file2"; + + at.touch(file_1); + at.touch(file_2); + let answers = [ "rm: refusing to remove '.' or '..' directory: skipping 'd/.'", "rm: refusing to remove '.' or '..' directory: skipping 'd/./'", "rm: refusing to remove '.' or '..' directory: skipping 'd/./'", "rm: refusing to remove '.' or '..' directory: skipping 'd/..'", "rm: refusing to remove '.' or '..' directory: skipping 'd/../'", + "rm: refusing to remove '.' or '..' directory: skipping '.'", + "rm: refusing to remove '.' or '..' directory: skipping './'", + "rm: refusing to remove '.' or '..' directory: skipping '../'", + "rm: refusing to remove '.' or '..' directory: skipping '..'", ]; let std_err_str = ts .ucmd() @@ -782,12 +792,20 @@ fn test_current_or_parent_dir_rm4() { .arg("d/.////") .arg("d/..") .arg("d/../") + .arg(".") + .arg("./") + .arg("../") + .arg("..") .fails() .stderr_move_str(); for (idx, line) in std_err_str.lines().enumerate() { assert_eq!(line, answers[idx]); } + // checks that no file was silently removed + assert!(at.dir_exists("d")); + assert!(at.file_exists(file_1)); + assert!(at.file_exists(file_2)); } #[test] @@ -798,12 +816,22 @@ fn test_current_or_parent_dir_rm4_windows() { at.mkdir("d"); + let file_1 = "file1"; + let file_2 = "d/file2"; + + at.touch(file_1); + at.touch(file_2); + let answers = [ "rm: refusing to remove '.' or '..' directory: skipping 'd\\.'", "rm: refusing to remove '.' or '..' directory: skipping 'd\\.\\'", "rm: refusing to remove '.' or '..' directory: skipping 'd\\.\\'", "rm: refusing to remove '.' or '..' directory: skipping 'd\\..'", "rm: refusing to remove '.' or '..' directory: skipping 'd\\..\\'", + "rm: refusing to remove '.' or '..' directory: skipping '.'", + "rm: refusing to remove '.' or '..' directory: skipping '.\\'", + "rm: refusing to remove '.' or '..' directory: skipping '..'", + "rm: refusing to remove '.' or '..' directory: skipping '..\\'", ]; let std_err_str = ts .ucmd() @@ -813,12 +841,21 @@ fn test_current_or_parent_dir_rm4_windows() { .arg("d\\.\\\\\\\\") .arg("d\\..") .arg("d\\..\\") + .arg(".") + .arg(".\\") + .arg("..") + .arg("..\\") .fails() .stderr_move_str(); for (idx, line) in std_err_str.lines().enumerate() { assert_eq!(line, answers[idx]); } + + // checks that no file was silently removed + assert!(at.dir_exists("d")); + assert!(at.file_exists(file_1)); + assert!(at.file_exists(file_2)); } #[test] diff --git a/tests/by-util/test_sha1sum.rs b/tests/by-util/test_sha1sum.rs new file mode 100644 index 00000000000..d0e7f6f3d71 --- /dev/null +++ b/tests/by-util/test_sha1sum.rs @@ -0,0 +1,165 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use uutests::new_ucmd; +use uutests::util::TestScenario; +use uutests::util_name; +// spell-checker:ignore checkfile, testf, ntestf +macro_rules! get_hash( + ($str:expr) => ( + $str.split(' ').collect::>()[0] + ); +); + +macro_rules! test_digest { + ($id:ident) => { + mod $id { + use uutests::util::*; + use uutests::util_name; + static EXPECTED_FILE: &'static str = concat!(stringify!($id), ".expected"); + static CHECK_FILE: &'static str = concat!(stringify!($id), ".checkfile"); + static INPUT_FILE: &'static str = "input.txt"; + + #[test] + fn test_single_file() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_stdin() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .pipe_in_fixture(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_check() { + let ts = TestScenario::new(util_name!()); + println!("File content='{}'", ts.fixtures.read(INPUT_FILE)); + println!("Check file='{}'", ts.fixtures.read(CHECK_FILE)); + + ts.ucmd() + .args(&["--check", CHECK_FILE]) + .succeeds() + .no_stderr() + .stdout_is("input.txt: OK\n"); + } + + #[test] + fn test_zero() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg("--zero") + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_missing_file() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.write("a", "file1\n"); + at.write("c", "file3\n"); + + ts.ucmd() + .args(&["a", "b", "c"]) + .fails() + .stdout_contains("a\n") + .stdout_contains("c\n") + .stderr_contains("b: No such file or directory"); + } + } + }; +} + +test_digest! {sha1} + +#[test] +fn test_check_sha1() { + // To make sure that #3815 doesn't happen again + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("testf", "foobar\n"); + at.write( + "testf.sha1", + "988881adc9fc3655077dc2d4d757d480b5ea0e11 testf\n", + ); + scene + .ccmd("sha1sum") + .arg("-c") + .arg(at.subdir.join("testf.sha1")) + .succeeds() + .stdout_is("testf: OK\n") + .stderr_is(""); +} + +#[test] +fn test_check_file_not_found_warning() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("testf", "foobar\n"); + at.write( + "testf.sha1", + "988881adc9fc3655077dc2d4d757d480b5ea0e11 testf\n", + ); + at.remove("testf"); + scene + .ccmd("sha1sum") + .arg("-c") + .arg(at.subdir.join("testf.sha1")) + .fails() + .stdout_is("testf: FAILED open or read\n") + .stderr_is("sha1sum: testf: No such file or directory\nsha1sum: WARNING: 1 listed file could not be read\n"); +} + +#[test] +fn test_invalid_arg() { + new_ucmd!().arg("--definitely-invalid").fails_with_code(1); +} + +#[test] +fn test_conflicting_arg() { + new_ucmd!().arg("--tag").arg("--check").fails_with_code(1); + new_ucmd!().arg("--tag").arg("--text").fails_with_code(1); +} + +#[test] +fn test_help_shows_correct_utility_name() { + // Test that help output shows the actual utility name instead of "hashsum" + + new_ucmd!() + .arg("--help") + .succeeds() + .stdout_contains("Usage: sha1sum") + .stdout_does_not_contain("Usage: hashsum"); +} diff --git a/tests/by-util/test_sha224sum.rs b/tests/by-util/test_sha224sum.rs new file mode 100644 index 00000000000..e2b7129b8a5 --- /dev/null +++ b/tests/by-util/test_sha224sum.rs @@ -0,0 +1,120 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use uutests::new_ucmd; +// spell-checker:ignore checkfile, testf, ntestf +macro_rules! get_hash( + ($str:expr) => ( + $str.split(' ').collect::>()[0] + ); +); + +macro_rules! test_digest { + ($id:ident) => { + mod $id { + use uutests::util::*; + use uutests::util_name; + static EXPECTED_FILE: &'static str = concat!(stringify!($id), ".expected"); + static CHECK_FILE: &'static str = concat!(stringify!($id), ".checkfile"); + static INPUT_FILE: &'static str = "input.txt"; + + #[test] + fn test_single_file() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_stdin() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .pipe_in_fixture(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_check() { + let ts = TestScenario::new(util_name!()); + println!("File content='{}'", ts.fixtures.read(INPUT_FILE)); + println!("Check file='{}'", ts.fixtures.read(CHECK_FILE)); + + ts.ucmd() + .args(&["--check", CHECK_FILE]) + .succeeds() + .no_stderr() + .stdout_is("input.txt: OK\n"); + } + + #[test] + fn test_zero() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg("--zero") + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_missing_file() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.write("a", "file1\n"); + at.write("c", "file3\n"); + + ts.ucmd() + .args(&["a", "b", "c"]) + .fails() + .stdout_contains("a\n") + .stdout_contains("c\n") + .stderr_contains("b: No such file or directory"); + } + } + }; +} +test_digest! {sha224} + +#[test] +fn test_invalid_arg() { + new_ucmd!().arg("--definitely-invalid").fails_with_code(1); +} + +#[test] +fn test_conflicting_arg() { + new_ucmd!().arg("--tag").arg("--check").fails_with_code(1); +} + +#[test] +fn test_help_shows_correct_utility_name() { + // Test that help output shows the actual utility name instead of "hashsum" + new_ucmd!() + .arg("--help") + .succeeds() + .stdout_contains("Usage: sha224sum") + .stdout_does_not_contain("Usage: hashsum"); +} diff --git a/tests/by-util/test_sha256sum.rs b/tests/by-util/test_sha256sum.rs new file mode 100644 index 00000000000..b3b538384b1 --- /dev/null +++ b/tests/by-util/test_sha256sum.rs @@ -0,0 +1,181 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use uutests::new_ucmd; +use uutests::util::TestScenario; +use uutests::util_name; +// spell-checker:ignore checkfile, testf, ntestf +macro_rules! get_hash( + ($str:expr) => ( + $str.split(' ').collect::>()[0] + ); +); + +macro_rules! test_digest { + ($id:ident) => { + mod $id { + use uutests::util::*; + use uutests::util_name; + static EXPECTED_FILE: &'static str = concat!(stringify!($id), ".expected"); + static CHECK_FILE: &'static str = concat!(stringify!($id), ".checkfile"); + static INPUT_FILE: &'static str = "input.txt"; + + #[test] + fn test_single_file() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_stdin() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .pipe_in_fixture(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_check() { + let ts = TestScenario::new(util_name!()); + println!("File content='{}'", ts.fixtures.read(INPUT_FILE)); + println!("Check file='{}'", ts.fixtures.read(CHECK_FILE)); + + ts.ucmd() + .args(&["--check", CHECK_FILE]) + .succeeds() + .no_stderr() + .stdout_is("input.txt: OK\n"); + } + + #[test] + fn test_zero() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg("--zero") + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_missing_file() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.write("a", "file1\n"); + at.write("c", "file3\n"); + + ts.ucmd() + .args(&["a", "b", "c"]) + .fails() + .stdout_contains("a\n") + .stdout_contains("c\n") + .stderr_contains("b: No such file or directory"); + } + } + }; +} + +test_digest! {sha256} + +#[test] +fn test_invalid_arg() { + new_ucmd!().arg("--definitely-invalid").fails_with_code(1); +} + +#[test] +fn test_conflicting_arg() { + new_ucmd!().arg("--tag").arg("--check").fails_with_code(1); + new_ucmd!().arg("--tag").arg("--text").fails_with_code(1); +} + +#[test] +fn test_tag() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("foobar", "foo bar\n"); + scene + .ccmd("sha256sum") + .arg("--tag") + .arg("foobar") + .succeeds() + .stdout_is( + "SHA256 (foobar) = 1f2ec52b774368781bed1d1fb140a92e0eb6348090619c9291f9a5a3c8e8d151\n", + ); +} + +#[test] +fn test_sha256_binary() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read("binary.sha256.expected"), + get_hash!( + ts.ucmd() + .arg("binary.png") + .succeeds() + .no_stderr() + .stdout_str() + ) + ); +} + +#[test] +fn test_sha256_stdin_binary() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read("binary.sha256.expected"), + get_hash!( + ts.ucmd() + .pipe_in_fixture("binary.png") + .succeeds() + .no_stderr() + .stdout_str() + ) + ); +} + +// This test is currently disabled on windows +#[test] +#[cfg_attr(windows, ignore = "Discussion is in #9168")] +fn test_check_sha256_binary() { + new_ucmd!() + .args(&["--check", "binary.sha256.checkfile"]) + .succeeds() + .no_stderr() + .stdout_is("binary.png: OK\n"); +} + +#[test] +fn test_help_shows_correct_utility_name() { + // Test that help output shows the actual utility name instead of "hashsum" + new_ucmd!() + .arg("--help") + .succeeds() + .stdout_contains("Usage: sha256sum") + .stdout_does_not_contain("Usage: hashsum"); +} diff --git a/tests/by-util/test_sha384sum.rs b/tests/by-util/test_sha384sum.rs new file mode 100644 index 00000000000..9dbc730801c --- /dev/null +++ b/tests/by-util/test_sha384sum.rs @@ -0,0 +1,122 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use uutests::new_ucmd; +// spell-checker:ignore checkfile, testf, ntestf +macro_rules! get_hash( + ($str:expr) => ( + $str.split(' ').collect::>()[0] + ); +); + +macro_rules! test_digest { + ($id:ident) => { + mod $id { + use uutests::util::*; + use uutests::util_name; + static EXPECTED_FILE: &'static str = concat!(stringify!($id), ".expected"); + static CHECK_FILE: &'static str = concat!(stringify!($id), ".checkfile"); + static INPUT_FILE: &'static str = "input.txt"; + + #[test] + fn test_single_file() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_stdin() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .pipe_in_fixture(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_check() { + let ts = TestScenario::new(util_name!()); + println!("File content='{}'", ts.fixtures.read(INPUT_FILE)); + println!("Check file='{}'", ts.fixtures.read(CHECK_FILE)); + + ts.ucmd() + .args(&["--check", CHECK_FILE]) + .succeeds() + .no_stderr() + .stdout_is("input.txt: OK\n"); + } + + #[test] + fn test_zero() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg("--zero") + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_missing_file() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.write("a", "file1\n"); + at.write("c", "file3\n"); + + ts.ucmd() + .args(&["a", "b", "c"]) + .fails() + .stdout_contains("a\n") + .stdout_contains("c\n") + .stderr_contains("b: No such file or directory"); + } + } + }; +} + +test_digest! {sha384} + +#[test] +fn test_invalid_arg() { + new_ucmd!().arg("--definitely-invalid").fails_with_code(1); +} + +#[test] +fn test_conflicting_arg() { + new_ucmd!().arg("--tag").arg("--check").fails_with_code(1); + new_ucmd!().arg("--tag").arg("--text").fails_with_code(1); +} + +#[test] +fn test_help_shows_correct_utility_name() { + // Test that help output shows the actual utility name instead of "hashsum" + new_ucmd!() + .arg("--help") + .succeeds() + .stdout_contains("Usage: sha384sum") + .stdout_does_not_contain("Usage: hashsum"); +} diff --git a/tests/by-util/test_sha512sum.rs b/tests/by-util/test_sha512sum.rs new file mode 100644 index 00000000000..5e01ad32a18 --- /dev/null +++ b/tests/by-util/test_sha512sum.rs @@ -0,0 +1,122 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use uutests::new_ucmd; +// spell-checker:ignore checkfile, testf, ntestf +macro_rules! get_hash( + ($str:expr) => ( + $str.split(' ').collect::>()[0] + ); +); + +macro_rules! test_digest { + ($id:ident) => { + mod $id { + use uutests::util::*; + use uutests::util_name; + static EXPECTED_FILE: &'static str = concat!(stringify!($id), ".expected"); + static CHECK_FILE: &'static str = concat!(stringify!($id), ".checkfile"); + static INPUT_FILE: &'static str = "input.txt"; + + #[test] + fn test_single_file() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_stdin() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .pipe_in_fixture(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_check() { + let ts = TestScenario::new(util_name!()); + println!("File content='{}'", ts.fixtures.read(INPUT_FILE)); + println!("Check file='{}'", ts.fixtures.read(CHECK_FILE)); + + ts.ucmd() + .args(&["--check", CHECK_FILE]) + .succeeds() + .no_stderr() + .stdout_is("input.txt: OK\n"); + } + + #[test] + fn test_zero() { + let ts = TestScenario::new(util_name!()); + assert_eq!( + ts.fixtures.read(EXPECTED_FILE), + get_hash!( + ts.ucmd() + .arg("--zero") + .arg(INPUT_FILE) + .succeeds() + .no_stderr() + .stdout_str() + ) + ); + } + + #[test] + fn test_missing_file() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.write("a", "file1\n"); + at.write("c", "file3\n"); + + ts.ucmd() + .args(&["a", "b", "c"]) + .fails() + .stdout_contains("a\n") + .stdout_contains("c\n") + .stderr_contains("b: No such file or directory"); + } + } + }; +} + +test_digest! {sha512} + +#[test] +fn test_invalid_arg() { + new_ucmd!().arg("--definitely-invalid").fails_with_code(1); +} + +#[test] +fn test_conflicting_arg() { + new_ucmd!().arg("--tag").arg("--check").fails_with_code(1); + new_ucmd!().arg("--tag").arg("--text").fails_with_code(1); +} + +#[test] +fn test_help_shows_correct_utility_name() { + // Test that help output shows the actual utility name instead of "hashsum" + new_ucmd!() + .arg("--help") + .succeeds() + .stdout_contains("Usage: sha512sum") + .stdout_does_not_contain("Usage: hashsum"); +} diff --git a/tests/by-util/test_shuf.rs b/tests/by-util/test_shuf.rs index 4d3f841ace9..948b3ed0756 100644 --- a/tests/by-util/test_shuf.rs +++ b/tests/by-util/test_shuf.rs @@ -4,6 +4,8 @@ // file that was distributed with this source code. // spell-checker:ignore (ToDO) unwritable +use std::fmt::Write; + use uutests::at_and_ucmd; use uutests::new_ucmd; @@ -847,3 +849,266 @@ fn test_range_repeat_empty_minus_one() { .no_stdout() .stderr_contains("invalid value '5-3' for '--input-range ': start exceeds end\n"); } + +// This test fails if we forget to flush the `BufWriter`. +#[test] +#[cfg(target_os = "linux")] +fn write_errors_are_reported() { + new_ucmd!() + .arg("-i1-10") + .arg("-o/dev/full") + .fails() + .no_stdout() + .stderr_is("shuf: write failed: No space left on device\n"); +} + +// On 32-bit platforms, if we cast carelessly, this will give no output. +#[test] +fn test_head_count_does_not_overflow_file() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.append("input.txt", "hello\n"); + + ucmd.arg(format!("-n{}", u64::from(u32::MAX) + 1)) + .arg("input.txt") + .succeeds() + .stdout_is("hello\n") + .no_stderr(); +} + +#[test] +fn test_head_count_does_not_overflow_args() { + new_ucmd!() + .arg(format!("-n{}", u64::from(u32::MAX) + 1)) + .arg("-e") + .arg("goodbye") + .succeeds() + .stdout_is("goodbye\n") + .no_stderr(); +} + +#[test] +fn test_head_count_does_not_overflow_range() { + new_ucmd!() + .arg(format!("-n{}", u64::from(u32::MAX) + 1)) + .arg("-i1-1") + .succeeds() + .stdout_is("1\n") + .no_stderr(); +} + +// Test reproducibility and compatibility of --random-source. +// These hard-coded results match those of GNU shuf. They should not be changed. + +#[test] +fn test_gnu_compat_range_repeat() { + let (at, mut ucmd) = at_and_ucmd!(); + at.append_bytes( + "random_bytes.bin", + b"\xfb\x83\x8f\x21\x9b\x3c\x2d\xc5\x73\xa5\x58\x6c\x54\x2f\x59\xf8", + ); + + ucmd.arg("--random-source=random_bytes.bin") + .arg("-r") + .arg("-i1-99") + .fails_with_code(1) + .stderr_is("shuf: end of random source\n") + .stdout_is("38\n30\n10\n26\n23\n61\n46\n99\n75\n43\n10\n89\n10\n44\n24\n59\n22\n51\n"); +} + +#[test] +fn test_gnu_compat_args_no_repeat() { + let (at, mut ucmd) = at_and_ucmd!(); + at.append_bytes( + "random_bytes.bin", + b"\xd1\xfd\xb9\x9a\xf5\x81\x71\x42\xf9\x7a\x59\x79\xd4\x9c\x8c\x7d", + ); + + ucmd.arg("--random-source=random_bytes.bin") + .arg("-e") + .args(&["1", "2", "3", "4", "5", "6", "7"][..]) + .succeeds() + .no_stderr() + .stdout_is("7\n1\n2\n5\n3\n4\n6\n"); +} + +#[test] +fn test_gnu_compat_from_stdin() { + let (at, mut ucmd) = at_and_ucmd!(); + at.append_bytes( + "random_bytes.bin", + b"\xd1\xfd\xb9\x9a\xf5\x81\x71\x42\xf9\x7a\x59\x79\xd4\x9c\x8c\x7d", + ); + + at.append("input.txt", "1\n2\n3\n4\n5\n6\n7\n"); + + ucmd.arg("--random-source=random_bytes.bin") + .set_stdin(at.open("input.txt")) + .succeeds() + .no_stderr() + .stdout_is("7\n1\n2\n5\n3\n4\n6\n"); +} + +#[test] +fn test_gnu_compat_from_file() { + let (at, mut ucmd) = at_and_ucmd!(); + at.append_bytes( + "random_bytes.bin", + b"\xd1\xfd\xb9\x9a\xf5\x81\x71\x42\xf9\x7a\x59\x79\xd4\x9c\x8c\x7d", + ); + + at.append("input.txt", "1\n2\n3\n4\n5\n6\n7\n"); + + ucmd.arg("--random-source=random_bytes.bin") + .arg("input.txt") + .succeeds() + .no_stderr() + .stdout_is("7\n1\n2\n5\n3\n4\n6\n"); +} + +#[test] +fn test_gnu_compat_limited_from_file() { + let (at, mut ucmd) = at_and_ucmd!(); + at.append_bytes( + "random_bytes.bin", + b"\xd1\xfd\xb9\x9a\xf5\x81\x71\x42\xf9\x7a\x59\x79\xd4\x9c\x8c\x7d", + ); + + at.append("input.txt", "1\n2\n3\n4\n5\n6\n7\n"); + + ucmd.arg("--random-source=random_bytes.bin") + .arg("-n5") + .arg("input.txt") + .succeeds() + .no_stderr() + .stdout_is("7\n1\n2\n5\n3\n"); +} + +// This specific case causes GNU to give different results than other modes. +#[ignore = "disabled until fixed"] +#[test] +fn test_gnu_compat_limited_from_stdin() { + let (at, mut ucmd) = at_and_ucmd!(); + at.append_bytes( + "random_bytes.bin", + b"\xd1\xfd\xb9\x9a\xf5\x81\x71\x42\xf9\x7a\x59\x79\xd4\x9c\x8c\x7d", + ); + + at.append("input.txt", "1\n2\n3\n4\n5\n6\n7\n"); + + ucmd.arg("--random-source=random_bytes.bin") + .arg("-n7") + .set_stdin(at.open("input.txt")) + .succeeds() + .no_stderr() + .stdout_is("6\n5\n1\n3\n2\n7\n4\n"); +} + +#[test] +fn test_gnu_compat_range_no_repeat() { + let (at, mut ucmd) = at_and_ucmd!(); + at.append_bytes( + "random_bytes.bin", + b"\xd1\xfd\xb9\x9a\xf5\x81\x71\x42\xf9\x7a\x59\x79\xd4\x9c\x8c\x7d", + ); + + ucmd.arg("--random-source=random_bytes.bin") + .arg("-i1-10") + .succeeds() + .no_stderr() + .stdout_is("10\n2\n8\n7\n3\n9\n6\n5\n1\n4\n"); +} + +// Test reproducibility of --random-seed. +// These results are arbitrary but they should not change unless we choose to break compatibility. + +#[test] +fn test_seed_args_repeat() { + new_ucmd!() + .arg("--random-seed=🌱") + .arg("-e") + .arg("-r") + .arg("-n10") + .args(&["foo", "bar", "baz", "qux"]) + .succeeds() + .no_stderr() + .stdout_is("qux\nbar\nbaz\nfoo\nbaz\nqux\nqux\nfoo\nqux\nqux\n"); +} + +#[test] +fn test_seed_args_no_repeat() { + new_ucmd!() + .arg("--random-seed=🌱") + .arg("-e") + .args(&["foo", "bar", "baz", "qux"]) + .succeeds() + .no_stderr() + .stdout_is("qux\nbaz\nfoo\nbar\n"); +} + +#[test] +fn test_seed_range_repeat() { + new_ucmd!() + .arg("--random-seed=🦀") + .arg("-r") + .arg("-i1-99") + .arg("-n10") + .succeeds() + .no_stderr() + .stdout_is("60\n44\n38\n41\n63\n43\n31\n71\n46\n90\n"); +} + +#[test] +fn test_seed_range_no_repeat() { + let expected = "8\n9\n1\n5\n2\n6\n4\n3\n10\n7\n"; + + new_ucmd!() + .arg("--random-seed=12345") + .arg("-i1-10") + .succeeds() + .no_stderr() + .stdout_is(expected); + + // Piping from e.g. seq gives identical results. + new_ucmd!() + .arg("--random-seed=12345") + .pipe_in("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n") + .succeeds() + .no_stderr() + .stdout_is(expected); +} + +// Test a longer input to exercise some more code paths in the sparse representation. +#[test] +fn test_seed_long_range_no_repeat() { + let expected = "\ + 1\n3\n35\n37\n36\n45\n72\n17\n18\n40\n67\n74\n81\n77\n14\n90\n\ + 7\n12\n80\n54\n23\n61\n29\n41\n15\n56\n6\n32\n82\n76\n11\n2\n100\n\ + 50\n60\n97\n73\n79\n91\n89\n85\n86\n66\n70\n22\n55\n8\n83\n39\n27\n"; + + new_ucmd!() + .arg("--random-seed=67890") + .arg("-i1-100") + .arg("-n50") + .succeeds() + .no_stderr() + .stdout_is(expected); + + let mut test_input = String::new(); + for n in 1..=100 { + writeln!(&mut test_input, "{n}").unwrap(); + } + + new_ucmd!() + .arg("--random-seed=67890") + .pipe_in(test_input.as_bytes()) + .arg("-n50") + .succeeds() + .no_stderr() + .stdout_is(expected); +} + +#[test] +fn test_empty_range_no_repeat() { + new_ucmd!().arg("-i4-3").succeeds().no_stderr().no_stdout(); +} diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 0106d719fad..bc2092b8db2 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -208,6 +208,24 @@ fn test_version_sort_stable() { .stdout_is("0.1\n0.02\n0.2\n0.002\n0.3\n"); } +#[test] +fn test_ignore_case_orders_punctuation_after_letters() { + new_ucmd!() + .arg("-f") + .pipe_in("A\na\n_\n") + .succeeds() + .stdout_is("A\na\n_\n"); +} + +#[test] +fn test_ignore_case_unique_orders_punctuation_after_letters() { + new_ucmd!() + .arg("-fu") + .pipe_in("a\n_\n") + .succeeds() + .stdout_is("a\n_\n"); +} + #[test] fn test_human_numeric_whitespace() { test_helper( @@ -258,6 +276,14 @@ fn test_multiple_decimals_numeric() { ); } +#[test] +fn test_multiple_groupings_numeric() { + test_helper( + "multiple_groupings_numeric", + &["-n", "--numeric-sort", "--sort=numeric", "--sort=n"], + ); +} + #[test] fn test_numeric_with_trailing_invalid_chars() { test_helper( @@ -1450,6 +1476,16 @@ fn test_multiple_output_files() { .stderr_is("sort: multiple output files specified\n"); } +#[test] +// Test for GNU tests/sort/sort.pl "o3" +fn test_duplicate_output_files_allowed() { + new_ucmd!() + .args(&["-o", "foo", "-o", "foo"]) + .pipe_in("") + .succeeds() + .no_stderr(); +} + #[test] fn test_output_file_with_leading_dash() { let test_cases = [ @@ -2359,18 +2395,18 @@ _ __ 1 _ -2,5 -_ 2.4 ___ +2,5 +_ 2.,,3 __ 2.4 ___ -2,,3 -_ 2.4 ___ +2,,3 +_ 1a _ 2b @@ -2517,4 +2553,90 @@ fn test_locale_collation_utf8() { } } +#[test] +fn test_locale_interleaved_en_us_utf8() { + // Test case for issue: locale-based collation support + // In en_US.UTF-8, lowercase and uppercase letters should interleave + // Expected: a, A, b, B (locale-aware) + // Not: A, B, a, b (ASCII byte order) + new_ucmd!() + .env("LC_ALL", "en_US.UTF-8") + .pipe_in("a\nA\nb\nB\n") + .succeeds() + .stdout_is("a\nA\nb\nB\n"); +} + +#[test] +fn test_locale_c_byte_order() { + // Test case for issue: C locale should use ASCII byte order + // In C locale: A < B < a < b (uppercase before lowercase) + new_ucmd!() + .env("LC_ALL", "C") + .pipe_in("a\nA\nb\nB\n") + .succeeds() + .stdout_is("A\nB\na\nb\n"); +} + +#[test] +fn test_locale_posix_byte_order() { + // POSIX locale should behave like C locale + new_ucmd!() + .env("LC_ALL", "POSIX") + .pipe_in("a\nA\nb\nB\n") + .succeeds() + .stdout_is("A\nB\na\nb\n"); +} + +#[test] +fn test_locale_with_ignore_case_flag() { + // When -f (ignore case) is used, the comparison uses custom_str_cmp + // which converts to uppercase for comparison. With -f flag, all letters + // are treated as equivalent regardless of case, so original order is preserved + // for equal keys (stable sort behavior within equal elements). + // Note: This may differ slightly from GNU in tie-breaking behavior. + let result = new_ucmd!() + .env("LC_ALL", "en_US.UTF-8") + .arg("-f") + .pipe_in("a\nA\nb\nB\n") + .succeeds(); + + // Verify that a/A come before b/B (case-insensitive grouping works) + let output = result.stdout_str(); + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines.len(), 4); + // a and A should come before b and B + let a_positions: Vec = lines + .iter() + .enumerate() + .filter(|(_, l)| **l == "a" || **l == "A") + .map(|(i, _)| i) + .collect(); + let b_positions: Vec = lines + .iter() + .enumerate() + .filter(|(_, l)| **l == "b" || **l == "B") + .map(|(i, _)| i) + .collect(); + assert!( + a_positions + .iter() + .all(|&a| b_positions.iter().all(|&b| a < b)), + "All 'a'/'A' should come before 'b'/'B' with -f flag" + ); +} + +#[test] +fn test_locale_complex_utf8_sorting() { + // More complex test with mixed case and special characters + // In en_US.UTF-8, should respect locale collation rules + // Locale collation is case-insensitive by default, with lowercase < uppercase for same base letter + let input = "zebra\nApple\napple\nBanana\nbanana\nZebra\n"; + + new_ucmd!() + .env("LC_ALL", "en_US.UTF-8") + .pipe_in(input) + .succeeds() + .stdout_is("apple\nApple\nbanana\nBanana\nzebra\nZebra\n"); +} + /* spell-checker: enable */ diff --git a/tests/by-util/test_tac.rs b/tests/by-util/test_tac.rs index be2b89cae53..1fc42d1c83f 100644 --- a/tests/by-util/test_tac.rs +++ b/tests/by-util/test_tac.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa +// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa bbaaa aaabc bcdddd cddddaaabc xyzabc abcxyzabc nbbaaa #[cfg(target_os = "linux")] use uutests::at_and_ucmd; use uutests::new_ucmd; @@ -347,3 +347,72 @@ fn test_stdin_bad_tmpdir_fallback() { .succeeds() .stdout_is("c\nb\na\n"); } + +#[test] +fn test_regex_or_operator() { + new_ucmd!() + .args(&["-r", "-s", r"[^x]\|x"]) + .pipe_in("abc") + .succeeds() + .stdout_is("cba"); +} + +#[test] +fn test_unescaped_middle_anchor() { + new_ucmd!() + .args(&["-r", "-s", r"1^2"]) + .pipe_in("111^222") + .succeeds() + .stdout_is("22111^2"); + + new_ucmd!() + .args(&["-r", "-s", r"a$b"]) + .pipe_in("aaa$bbb") + .succeeds() + .stdout_is("bbaaa$b"); +} + +#[test] +fn test_escaped_middle_anchor() { + new_ucmd!() + .args(&["-r", "-s", r"c\^b"]) + .pipe_in("aaabc^bcdddd") + .succeeds() + .stdout_is("cddddaaabc^b"); + + new_ucmd!() + .args(&["-r", "-s", r"c\$b"]) + .pipe_in("aaabc$bcdddd") + .succeeds() + .stdout_is("cddddaaabc$b"); +} + +#[test] +fn test_regular_start_anchor() { + new_ucmd!() + .args(&["-r", "-s", r"^abc"]) + .pipe_in("xyzabc123abc") + .succeeds() + .stdout_is("xyzabc123abc"); + + new_ucmd!() + .args(&["-r", "-s", r"^b"]) + .pipe_in("aaa\nbbb\nccc\n") + .succeeds() + .stdout_is("bb\nccc\naaa\nb"); +} + +#[test] +fn test_regular_end_anchor() { + new_ucmd!() + .args(&["-r", "-s", r"abc$"]) + .pipe_in("123abcxyzabc") + .succeeds() + .stdout_is("123abcxyzabc"); + + new_ucmd!() + .args(&["-r", "-s", r"b$"]) + .pipe_in("aaa\nbbb\nccc\n") + .succeeds() + .stdout_is("\nccc\nbbaaa\nb"); +} diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index 9d4a270e2b2..369ac9ee654 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -2742,12 +2742,12 @@ fn test_fifo() { not(target_os = "openbsd") ))] fn test_fifo_with_pid() { - use std::process::Command; + use std::process::{Command, Stdio}; let (at, mut ucmd) = at_and_ucmd!(); at.mkfifo("FIFO"); - let mut dummy = Command::new("sh").spawn().unwrap(); + let mut dummy = Command::new("sh").stdin(Stdio::null()).spawn().unwrap(); let pid = dummy.id(); let mut child = ucmd diff --git a/tests/by-util/test_test.rs b/tests/by-util/test_test.rs index 21ea1893e99..d7f8215bdb5 100644 --- a/tests/by-util/test_test.rs +++ b/tests/by-util/test_test.rs @@ -314,6 +314,26 @@ fn test_invalid_utf8_integer_compare() { .stderr_is("test: invalid integer $'fo\\x80o'\n"); } +#[test] +fn test_integer_whitespace_stripping() { + new_ucmd!().args(&["42", "-eq", " 42 "]).succeeds(); + new_ucmd!().args(&["42", "-eq", " 42"]).succeeds(); + new_ucmd!().args(&["42", "-eq", "42 "]).succeeds(); + new_ucmd!().args(&[" 42 ", "-eq", "42"]).succeeds(); + + new_ucmd!().args(&["42", "-eq", "\t42"]).succeeds(); + new_ucmd!().args(&["42", "-eq", "\n42"]).succeeds(); + new_ucmd!().args(&["42", "-eq", "\x0b42"]).succeeds(); // Vertical tab + new_ucmd!().args(&["42", "-eq", "\x0c42"]).succeeds(); // Form feed + new_ucmd!().args(&["42", "-eq", "\r42"]).succeeds(); +} + +#[test] +fn test_isatty_whitespace_stripping() { + new_ucmd!().args(&["-t", " 0 "]).fails_with_code(1); + new_ucmd!().args(&["-t", "\n0\t"]).fails_with_code(1); +} + #[test] #[cfg(unix)] fn test_file_is_itself() { diff --git a/tests/by-util/test_timeout.rs b/tests/by-util/test_timeout.rs index adce254d5ec..a9b9b29dbc0 100644 --- a/tests/by-util/test_timeout.rs +++ b/tests/by-util/test_timeout.rs @@ -8,7 +8,8 @@ use std::time::Duration; use rstest::rstest; use uucore::display::Quotable; -use uutests::new_ucmd; +use uutests::util::TestScenario; +use uutests::{new_ucmd, util_name}; #[test] fn test_invalid_arg() { @@ -235,3 +236,53 @@ fn test_command_cannot_invoke() { // Try to execute a directory (should give permission denied or similar) new_ucmd!().args(&["1", "/"]).fails_with_code(126); } + +#[test] +#[cfg(unix)] +fn test_sigchld_ignored_by_parent() { + let ts = TestScenario::new(util_name!()); + let bin_path = ts.bin_path.to_string_lossy(); + ts.ucmd() + .args(&[ + "10", + "sh", + "-c", + &format!("trap '' CHLD; exec {bin_path} timeout 1 true"), + ]) + .succeeds(); +} + +#[test] +#[cfg(unix)] +fn test_with_background_child() { + new_ucmd!() + .args(&[".5", "sh", "-c", "sleep .1 & sleep 2"]) + .fails_with_code(124) + .no_stdout(); +} + +#[test] +#[cfg(unix)] +fn test_forward_sigint_to_child() { + let mut cmd = new_ucmd!() + .args(&[ + "10", + "sh", + "-c", + "trap 'echo got_int; exit 42' INT; sleep 5", + ]) + .run_no_wait(); + cmd.delay(100); + cmd.kill_with_custom_signal(nix::sys::signal::Signal::SIGINT); + cmd.make_assertion() + .is_not_alive() + .with_current_output() + .stdout_contains("got_int"); +} + +#[test] +fn test_foreground_signal0_kill_after() { + new_ucmd!() + .args(&["--foreground", "-s0", "-k.1", ".1", "sleep", "10"]) + .fails_with_code(137); +} diff --git a/tests/by-util/test_unexpand.rs b/tests/by-util/test_unexpand.rs index fdba510c300..d29fecfd360 100644 --- a/tests/by-util/test_unexpand.rs +++ b/tests/by-util/test_unexpand.rs @@ -283,6 +283,15 @@ fn test_one_nonexisting_file() { .stderr_contains("asdf.txt: No such file or directory"); } +#[test] +#[cfg(all(target_os = "linux", not(target_env = "musl")))] +fn test_read_error() { + new_ucmd!() + .arg("/proc/self/mem") + .fails() + .stderr_contains("unexpand: /proc/self/mem: Input/output error"); +} + #[test] #[cfg(target_os = "linux")] fn test_non_utf8_filename() { diff --git a/tests/fixtures/hashsum/b2sum.checkfile b/tests/fixtures/b2sum/b2sum.checkfile similarity index 100% rename from tests/fixtures/hashsum/b2sum.checkfile rename to tests/fixtures/b2sum/b2sum.checkfile diff --git a/tests/fixtures/hashsum/b2sum.expected b/tests/fixtures/b2sum/b2sum.expected similarity index 100% rename from tests/fixtures/hashsum/b2sum.expected rename to tests/fixtures/b2sum/b2sum.expected diff --git a/tests/fixtures/b2sum/input.txt b/tests/fixtures/b2sum/input.txt new file mode 100644 index 00000000000..8c01d89ae06 --- /dev/null +++ b/tests/fixtures/b2sum/input.txt @@ -0,0 +1 @@ +hello, world \ No newline at end of file diff --git a/tests/fixtures/md5sum/input.txt b/tests/fixtures/md5sum/input.txt new file mode 100644 index 00000000000..8c01d89ae06 --- /dev/null +++ b/tests/fixtures/md5sum/input.txt @@ -0,0 +1 @@ +hello, world \ No newline at end of file diff --git a/tests/fixtures/hashsum/md5.checkfile b/tests/fixtures/md5sum/md5.checkfile similarity index 100% rename from tests/fixtures/hashsum/md5.checkfile rename to tests/fixtures/md5sum/md5.checkfile diff --git a/tests/fixtures/hashsum/md5.expected b/tests/fixtures/md5sum/md5.expected similarity index 100% rename from tests/fixtures/hashsum/md5.expected rename to tests/fixtures/md5sum/md5.expected diff --git a/tests/fixtures/sha1sum/input.txt b/tests/fixtures/sha1sum/input.txt new file mode 100644 index 00000000000..8c01d89ae06 --- /dev/null +++ b/tests/fixtures/sha1sum/input.txt @@ -0,0 +1 @@ +hello, world \ No newline at end of file diff --git a/tests/fixtures/hashsum/sha1.checkfile b/tests/fixtures/sha1sum/sha1.checkfile similarity index 100% rename from tests/fixtures/hashsum/sha1.checkfile rename to tests/fixtures/sha1sum/sha1.checkfile diff --git a/tests/fixtures/hashsum/sha1.expected b/tests/fixtures/sha1sum/sha1.expected similarity index 100% rename from tests/fixtures/hashsum/sha1.expected rename to tests/fixtures/sha1sum/sha1.expected diff --git a/tests/fixtures/sha224sum/input.txt b/tests/fixtures/sha224sum/input.txt new file mode 100644 index 00000000000..8c01d89ae06 --- /dev/null +++ b/tests/fixtures/sha224sum/input.txt @@ -0,0 +1 @@ +hello, world \ No newline at end of file diff --git a/tests/fixtures/hashsum/sha224.checkfile b/tests/fixtures/sha224sum/sha224.checkfile similarity index 100% rename from tests/fixtures/hashsum/sha224.checkfile rename to tests/fixtures/sha224sum/sha224.checkfile diff --git a/tests/fixtures/hashsum/sha224.expected b/tests/fixtures/sha224sum/sha224.expected similarity index 100% rename from tests/fixtures/hashsum/sha224.expected rename to tests/fixtures/sha224sum/sha224.expected diff --git a/tests/fixtures/sha256sum/binary.png b/tests/fixtures/sha256sum/binary.png new file mode 100644 index 00000000000..6c4161338f2 Binary files /dev/null and b/tests/fixtures/sha256sum/binary.png differ diff --git a/tests/fixtures/hashsum/binary.sha256.checkfile b/tests/fixtures/sha256sum/binary.sha256.checkfile similarity index 100% rename from tests/fixtures/hashsum/binary.sha256.checkfile rename to tests/fixtures/sha256sum/binary.sha256.checkfile diff --git a/tests/fixtures/hashsum/binary.sha256.expected b/tests/fixtures/sha256sum/binary.sha256.expected similarity index 100% rename from tests/fixtures/hashsum/binary.sha256.expected rename to tests/fixtures/sha256sum/binary.sha256.expected diff --git a/tests/fixtures/sha256sum/input.txt b/tests/fixtures/sha256sum/input.txt new file mode 100644 index 00000000000..8c01d89ae06 --- /dev/null +++ b/tests/fixtures/sha256sum/input.txt @@ -0,0 +1 @@ +hello, world \ No newline at end of file diff --git a/tests/fixtures/hashsum/sha256.checkfile b/tests/fixtures/sha256sum/sha256.checkfile similarity index 100% rename from tests/fixtures/hashsum/sha256.checkfile rename to tests/fixtures/sha256sum/sha256.checkfile diff --git a/tests/fixtures/hashsum/sha256.expected b/tests/fixtures/sha256sum/sha256.expected similarity index 100% rename from tests/fixtures/hashsum/sha256.expected rename to tests/fixtures/sha256sum/sha256.expected diff --git a/tests/fixtures/sha384sum/input.txt b/tests/fixtures/sha384sum/input.txt new file mode 100644 index 00000000000..8c01d89ae06 --- /dev/null +++ b/tests/fixtures/sha384sum/input.txt @@ -0,0 +1 @@ +hello, world \ No newline at end of file diff --git a/tests/fixtures/hashsum/sha384.checkfile b/tests/fixtures/sha384sum/sha384.checkfile similarity index 100% rename from tests/fixtures/hashsum/sha384.checkfile rename to tests/fixtures/sha384sum/sha384.checkfile diff --git a/tests/fixtures/hashsum/sha384.expected b/tests/fixtures/sha384sum/sha384.expected similarity index 100% rename from tests/fixtures/hashsum/sha384.expected rename to tests/fixtures/sha384sum/sha384.expected diff --git a/tests/fixtures/sha512sum/input.txt b/tests/fixtures/sha512sum/input.txt new file mode 100644 index 00000000000..8c01d89ae06 --- /dev/null +++ b/tests/fixtures/sha512sum/input.txt @@ -0,0 +1 @@ +hello, world \ No newline at end of file diff --git a/tests/fixtures/sha512sum/sha512.checkfile b/tests/fixtures/sha512sum/sha512.checkfile new file mode 100644 index 00000000000..41a55cabbb5 --- /dev/null +++ b/tests/fixtures/sha512sum/sha512.checkfile @@ -0,0 +1 @@ +8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9 input.txt diff --git a/tests/fixtures/sha512sum/sha512.expected b/tests/fixtures/sha512sum/sha512.expected new file mode 100644 index 00000000000..fd817368620 --- /dev/null +++ b/tests/fixtures/sha512sum/sha512.expected @@ -0,0 +1 @@ +8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9 \ No newline at end of file diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected index 59541af3252..a781a36bba8 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected @@ -21,10 +21,10 @@ CARAvan 8.013 45 46.89 -576,446.88800000 -576,446.890 4567. 37800 +576,446.88800000 +576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug index b7b76e58986..a00067b1ee6 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug @@ -67,18 +67,18 @@ __ 46.89 _____ _____ -576,446.88800000 -___ -________________ -576,446.890 -___ -___________ 4567. _____ ____________________ >>>>37800 _____ _________ +576,446.88800000 +___ +________________ +576,446.890 +___ +___________ 4798908.340000000000 ____________________ ____________________ diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected index 0ccdd84c059..36eeda637f7 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected @@ -24,10 +24,10 @@ CARAvan 8.013 45 46.89 -576,446.890 -576,446.88800000 4567. 37800 +576,446.88800000 +576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug index 66a98b20879..3fba8903042 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug @@ -50,14 +50,14 @@ _____ __ 46.89 _____ -576,446.890 -___ -576,446.88800000 -___ 4567. _____ >>>>37800 _____ +576,446.88800000 +___ +576,446.890 +___ 4798908.340000000000 ____________________ 4798908.45 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected index cd4256c5f46..cb27c6664ce 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected @@ -11,9 +11,10 @@ 8.013 45 46.89 -576,446.890 4567. 37800 +576,446.88800000 +576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug index 663a4b3a918..dd6e8dfcc67 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug @@ -24,12 +24,14 @@ _____ __ 46.89 _____ -576,446.890 -___ 4567. _____ >>>>37800 _____ +576,446.88800000 +___ +576,446.890 +___ 4798908.340000000000 ____________________ 4798908.45 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected index 97e261f1452..bbce169347f 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected @@ -1,9 +1,10 @@ 4798908.8909800 4798908.45 4798908.340000000000 +576,446.890 +576,446.88800000 37800 4567. -576,446.890 46.89 45 8.013 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug index 01f7abf5bf2..4b01a840618 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug @@ -4,12 +4,14 @@ _______________ __________ 4798908.340000000000 ____________________ +576,446.890 +___ +576,446.88800000 +___ >>>>37800 _____ 4567. _____ -576,446.890 -___ 46.89 _____ 45 diff --git a/tests/fixtures/sort/multiple_decimals_numeric.expected b/tests/fixtures/sort/multiple_decimals_numeric.expected index 8f42e7ce5da..3ef4d22e881 100644 --- a/tests/fixtures/sort/multiple_decimals_numeric.expected +++ b/tests/fixtures/sort/multiple_decimals_numeric.expected @@ -21,8 +21,6 @@ CARAvan 8.013 45 46.89 -576,446.88800000 -576,446.890 4567..457 4567. 4567.1 @@ -30,6 +28,8 @@ CARAvan 37800 45670.89079.098 45670.89079.1 +576,446.88800000 +576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/multiple_decimals_numeric.expected.debug b/tests/fixtures/sort/multiple_decimals_numeric.expected.debug index 948c4869c32..0ae6d2958a5 100644 --- a/tests/fixtures/sort/multiple_decimals_numeric.expected.debug +++ b/tests/fixtures/sort/multiple_decimals_numeric.expected.debug @@ -67,12 +67,6 @@ __ 46.89 _____ _____ -576,446.88800000 -___ -________________ -576,446.890 -___ -___________ >>>>>>>>>>4567..457 _____ ___________________ @@ -94,6 +88,12 @@ _____________________ >>>>>>45670.89079.1 ___________ ___________________ +576,446.88800000 +___ +________________ +576,446.890 +___ +___________ 4798908.340000000000 ____________________ ____________________ diff --git a/tests/fixtures/sort/multiple_groupings_numeric.expected b/tests/fixtures/sort/multiple_groupings_numeric.expected new file mode 100644 index 00000000000..a6daab83676 --- /dev/null +++ b/tests/fixtures/sort/multiple_groupings_numeric.expected @@ -0,0 +1,15 @@ + + + +CARAvan + 1.234 +2.000 +2.000,50 +22 +23,. +111 + 210 +1,234 +12,34 + 1,999.99 + 2,000 diff --git a/tests/fixtures/sort/multiple_groupings_numeric.expected.debug b/tests/fixtures/sort/multiple_groupings_numeric.expected.debug new file mode 100644 index 00000000000..57a4ae01b9a --- /dev/null +++ b/tests/fixtures/sort/multiple_groupings_numeric.expected.debug @@ -0,0 +1,45 @@ + +^ no match for key +^ no match for key + +^ no match for key +^ no match for key + +^ no match for key +^ no match for key +CARAvan +^ no match for key +_______ +>1.234 + _____ +______ +2.000 +_____ +_____ +2.000,50 +_____ +________ +22 +__ +__ +23,. +__ +____ +111 +___ +___ +>210 + ___ +____ +1,234 +_ +_____ +12,34 +__ +_____ +>>1,999.99 + _ +__________ +>>>2,000 + _ +________ diff --git a/tests/fixtures/sort/multiple_groupings_numeric.txt b/tests/fixtures/sort/multiple_groupings_numeric.txt new file mode 100644 index 00000000000..264403a79ee --- /dev/null +++ b/tests/fixtures/sort/multiple_groupings_numeric.txt @@ -0,0 +1,15 @@ +1,234 +12,34 + + 1.234 +2.000 + 2,000 +111 + + +CARAvan +22 +23,. + 210 + 1,999.99 +2.000,50 \ No newline at end of file diff --git a/tests/tests.rs b/tests/tests.rs index 9ffdfd4a312..d2ecbca10f1 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -68,6 +68,34 @@ mod test_cksum; #[path = "by-util/test_comm.rs"] mod test_comm; +#[cfg(feature = "b2sum")] +#[path = "by-util/test_b2sum.rs"] +mod test_b2sum; + +#[cfg(feature = "md5sum")] +#[path = "by-util/test_md5sum.rs"] +mod test_md5sum; + +#[cfg(feature = "sha1sum")] +#[path = "by-util/test_sha1sum.rs"] +mod test_sha1sum; + +#[cfg(feature = "sha224sum")] +#[path = "by-util/test_sha224sum.rs"] +mod test_sha224sum; + +#[cfg(feature = "sha256sum")] +#[path = "by-util/test_sha256sum.rs"] +mod test_sha256sum; + +#[cfg(feature = "sha384sum")] +#[path = "by-util/test_sha384sum.rs"] +mod test_sha384sum; + +#[cfg(feature = "sha512sum")] +#[path = "by-util/test_sha512sum.rs"] +mod test_sha512sum; + #[cfg(feature = "cp")] #[path = "by-util/test_cp.rs"] mod test_cp; diff --git a/tests/uutests/Cargo.toml b/tests/uutests/Cargo.toml index e73ea59022d..57eea11aed3 100644 --- a/tests/uutests/Cargo.toml +++ b/tests/uutests/Cargo.toml @@ -36,6 +36,8 @@ uucore = { workspace = true, features = [ [target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] [target.'cfg(unix)'.dependencies] -nix = { workspace = true, features = ["process", "signal", "user", "term"] } -rlimit = "0.10.1" +nix = { workspace = true, features = ["process", "signal", "term", "user"] } +rlimit = { workspace = true } + +[target.'cfg(all(unix, not(any(target_os = "macos", target_os = "openbsd"))))'.dependencies] xattr = { workspace = true } diff --git a/util/android-commands.sh b/util/android-commands.sh index b87d7050b0c..63adf0ec422 100755 --- a/util/android-commands.sh +++ b/util/android-commands.sh @@ -534,7 +534,7 @@ snapshot() { # We need to install nextest via cargo currently, since there is no pre-built binary for android x86 # explicitly set CARGO_TARGET_DIR as otherwise a random generated tmp directory is used, # which prevents incremental build for the retries. - command="export CARGO_TERM_COLOR=always && export CARGO_TARGET_DIR=\"cargo_install_target_dir\" && cargo install cargo-nextest" + command="export CARGO_TERM_COLOR=always && export CARGO_TARGET_DIR=\"cargo_install_target_dir\" && cargo install cargo-nextest --locked" run_with_retry 3 run_command_via_ssh "$command" return_code=$? diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 7a9f45c385b..0d1c6d9e114 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -15,7 +15,6 @@ command -v gsed && sed(){ gsed "$@";} SED=$(command -v gsed||command -v sed) # for find...exec... SYSTEM_TIMEOUT=$(command -v timeout) -SYSTEM_YES=$(command -v yes) ME="${0}" ME_dir="$(dirname -- "$(readlink -fm -- "${ME}")")" @@ -187,13 +186,6 @@ sed -i "s|cannot create regular file 'no-such/': Not a directory|'no-such/' is n # Our message is better sed -i "s|warning: unrecognized escape|warning: incomplete hex escape|" tests/stat/stat-printf.pl -sed -i 's|timeout |'"${SYSTEM_TIMEOUT}"' |' tests/tail/follow-stdin.sh - -# trap_sigpipe_or_skip_ fails with uutils tools because of a bug in -# timeout/yes (https://github.com/uutils/coreutils/issues/7252), so we use -# system's yes/timeout to make sure the tests run (instead of being skipped). -sed -i 's|\(trap .* \)timeout\( .* \)yes|'"\1${SYSTEM_TIMEOUT}\2${SYSTEM_YES}"'|' init.cfg - # Remove dup of /usr/bin/ and /usr/local/bin/ when executed several times grep -rlE '/usr/bin/\s?/usr/bin' init.cfg tests/* | xargs -r "${SED}" -Ei 's|/usr/bin/\s?/usr/bin/|/usr/bin/|g' grep -rlE '/usr/local/bin/\s?/usr/local/bin' init.cfg tests/* | xargs -r "${SED}" -Ei 's|/usr/local/bin/\s?/usr/local/bin/|/usr/local/bin/|g' diff --git a/util/show-utils.sh b/util/show-utils.sh index 3cc48794030..66266e7a992 100755 --- a/util/show-utils.sh +++ b/util/show-utils.sh @@ -14,8 +14,8 @@ ME_parent_dir_abs="$("${REALPATH}" -mP -- "${ME_parent_dir}" || "${REALPATH}" -- # refs: , -# default ("Tier 1" cross-platform) utility list -default_utils="base32 base64 basename cat cksum comm cp cut date dircolors dirname echo env expand expr factor false fmt fold hashsum head join link ln ls mkdir mktemp more mv nl od paste printenv printf ptx pwd readlink realpath rm rmdir seq shred shuf sleep sort split sum tac tail tee test tr true truncate tsort unexpand uniq wc yes" +# default utility list +default_utils=$(sed -n '/feat_common_core = \[/,/\]/p' Cargo.toml | sed '1d' |tr -d '],"\n') # $(sed -n '/feat_Tier1 = \[/,/\]/p' Cargo.toml | sed '1d;2d' |tr -d '],"\n') too? project_main_dir="${ME_parent_dir_abs}" # printf 'project_main_dir="%s"\n' "${project_main_dir}"