Skip to content

Commit bc49dd8

Browse files
committed
Fix expansion of multibyte IFS characters
Closes att#13. Previously, the `varsub` method used for the macro expansion of `$param`, `${param}`, and `${param op word}` would incorrectly expand the internal field separator (IFS) if it was a multibyte character. This was due to truncation based on the incorrect assumption that the IFS would never be larger than a single byte. This change fixes this issue by carefully tracking the number of bytes that should be persisted in the IFS case and ensuring that all bytes are written during expansion and substitution.
1 parent 82d686d commit bc49dd8

File tree

3 files changed

+45
-4
lines changed

3 files changed

+45
-4
lines changed

src/cmd/ksh93/meson.build

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ all_tests = [
8888
['alias'], ['append'], ['arith'], ['arrays'], ['arrays2'], ['attributes'],
8989
['basic', 90], ['bracket'], ['builtins'], ['case'], ['comvar'],
9090
['comvario'], ['coprocess', 50], ['cubetype'], ['directoryfd'], ['enum'],
91-
['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'],
91+
['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'], ['ifs'],
9292
['io'], ['leaks'], ['locale'], ['math', 50], ['nameref'], ['namespace'],
9393
['modifiers'], ['options'], ['path'], ['pointtype'], ['quoting'],
9494
['quoting2'], ['readcsv'], ['recttype'], ['restricted'], ['return'], ['select'],

src/cmd/ksh93/sh/macro.c

+14-3
Original file line numberDiff line numberDiff line change
@@ -1792,10 +1792,21 @@ static_fn bool varsub(Mac_t *mp) {
17921792
mp->atmode = mode == '@';
17931793
mp->pattern = oldpat;
17941794
} else if (d) {
1795-
if (mp->sp) {
1796-
sfputc(mp->sp, d);
1795+
Sfio_t *sfio_ptr = (mp->sp) ? mp->sp : stkp;
1796+
1797+
// We know from above that if we are not performing @-expansion
1798+
// then we assigned `d` the value of `mp->ifs`, here we check
1799+
// whether or not we have a valid string of IFS characters to
1800+
// write as it is possible for `d` to be set to `mp->ifs` and
1801+
// yet `mp->ifsp` to be NULL.
1802+
if (mode != '@' && mp->ifsp) {
1803+
// Handle multi-byte characters being used for the internal
1804+
// field separator (IFS).
1805+
for (int i = 0; i < mbsize(mp->ifsp); i++) {
1806+
sfputc(sfio_ptr, mp->ifsp[i]);
1807+
}
17971808
} else {
1798-
sfputc(stkp, d);
1809+
sfputc(sfio_ptr, d);
17991810
}
18001811
}
18011812
}

src/cmd/ksh93/tests/ifs.sh

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# These are the tests for the internal field separator (IFS).
2+
3+
IFS=e
4+
set : :
5+
[[ "$*" == ":e:" ]] || log_error "IFS failed" ":e:" "$*"
6+
7+
IFS='|' read -r first second third <<< 'one|two|three'
8+
[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}"
9+
[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}"
10+
[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}"
11+
12+
# Multi-byte character checks will only work if UTF-8 inputs are enabled
13+
if [ "${LC_ALL}" = "en_US.UTF-8" ]
14+
then
15+
# 2 byte latin accented e character
16+
IFS=é
17+
set : :
18+
[[ "$*" == ":é:" ]] || log_error "IFS failed with multibyte character" ":é:" "$*"
19+
20+
# 4 byte roman sestertius character
21+
IFS=𐆘 read -r first second third <<< 'one𐆘two𐆘three'
22+
[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}"
23+
[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}"
24+
[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}"
25+
26+
# Ensure subshells don't get corrupted when IFS becomes multibyte character
27+
expected_output=$(printf ":é:\\ntrap -- 'echo end' EXIT\\nend")
28+
output=$(LANG=C.UTF-8; IFS=é; set : :; echo "$*"; trap "echo end" EXIT; trap)
29+
[[ "${output}" == "${expected_output}" ]] || log_error "IFS subshell failed" "${expected_output}" "${output}"
30+
fi

0 commit comments

Comments
 (0)