Skip to content

Commit b27d83c

Browse files
committed
Fix expansion of multibyte IFS characters
Closes att#13. Previously, the `varsub` method used for the macro expansion of `$param`, `${param}`, and `${param op word}` would incorrectly expand the internal field separator (IFS) if it was a multibyte character. This was due to truncation based on the incorrect assumption that the IFS would never be larger than a single byte. This change fixes this issue by carefully tracking the number of bytes that should be persisted in the IFS case and ensuring that all bytes are written during expansion and substitution.
1 parent 82d686d commit b27d83c

File tree

3 files changed

+47
-5
lines changed

3 files changed

+47
-5
lines changed

src/cmd/ksh93/meson.build

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ all_tests = [
8888
['alias'], ['append'], ['arith'], ['arrays'], ['arrays2'], ['attributes'],
8989
['basic', 90], ['bracket'], ['builtins'], ['case'], ['comvar'],
9090
['comvario'], ['coprocess', 50], ['cubetype'], ['directoryfd'], ['enum'],
91-
['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'],
91+
['exit'], ['expand'], ['functions'], ['glob'], ['grep'], ['heredoc'], ['ifs'],
9292
['io'], ['leaks'], ['locale'], ['math', 50], ['nameref'], ['namespace'],
9393
['modifiers'], ['options'], ['path'], ['pointtype'], ['quoting'],
9494
['quoting2'], ['readcsv'], ['recttype'], ['restricted'], ['return'], ['select'],

src/cmd/ksh93/sh/macro.c

+16-4
Original file line numberDiff line numberDiff line change
@@ -1663,7 +1663,13 @@ static_fn bool varsub(Mac_t *mp) {
16631663
int match[2 * (MATCH_MAX + 1)], index;
16641664
int nmatch, nmatch_prev, vsize_last, tsize;
16651665
char *vlast = NULL, *oldv;
1666-
d = (mode == '@' ? ' ' : mp->ifs);
1666+
char *ifs_bytes = NULL;
1667+
if (mode == '@') {
1668+
d = ' ';
1669+
} else {
1670+
d = mp->ifs;
1671+
ifs_bytes = mp->ifsp;
1672+
}
16671673
while (1) {
16681674
if (!v) v = "";
16691675
if (c == '/' || c == '#' || c == '%') {
@@ -1792,10 +1798,16 @@ static_fn bool varsub(Mac_t *mp) {
17921798
mp->atmode = mode == '@';
17931799
mp->pattern = oldpat;
17941800
} else if (d) {
1795-
if (mp->sp) {
1796-
sfputc(mp->sp, d);
1801+
Sfio_t *sfio_ptr = (mp->sp) ? mp->sp : stkp;
1802+
1803+
// Handle multi-byte characters being used for the internal
1804+
// field separator (IFS).
1805+
if (ifs_bytes) {
1806+
for (int i = 0; i < mbsize(ifs_bytes); i++) {
1807+
sfputc(sfio_ptr, ifs_bytes[i]);
1808+
}
17971809
} else {
1798-
sfputc(stkp, d);
1810+
sfputc(sfio_ptr, d);
17991811
}
18001812
}
18011813
}

src/cmd/ksh93/tests/ifs.sh

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# These are the tests for the internal field separator (IFS).
2+
3+
IFS=e
4+
set : :
5+
[[ "$*" == ":e:" ]] || log_error "IFS failed" ":e:" "$*"
6+
7+
IFS='|' read first second third <<< 'one|two|three'
8+
[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}"
9+
[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}"
10+
[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}"
11+
12+
# Multi-byte character checks will only work if UTF-8 inputs are enabled
13+
if [ "${LANG}" = "C.UTF-8" ]
14+
then
15+
# 2 byte latin accented e character
16+
IFS=é
17+
set : :
18+
[[ "$*" == ":é:" ]] || log_error "IFS failed with multibyte character" ":é:" "$*"
19+
20+
# 4 byte roman sestertius character
21+
IFS=𐆘 read first second third <<< 'one𐆘two𐆘three'
22+
[[ "${first}" == "one" ]] || log_error "IFS failed" "one" "${first}"
23+
[[ "${second}" == "two" ]] || log_error "IFS failed" "two" "${second}"
24+
[[ "${third}" == "three" ]] || log_error "IFS failed" "three" "${third}"
25+
26+
# Ensure subshells don't get corrupted when IFS becomes multibyte character
27+
expected_output=$(printf ":é:\\ntrap -- 'echo end' EXIT\\nend")
28+
output=$(LANG=C.UTF-8; IFS=é; set : :; echo "$*"; trap "echo end" EXIT; LC_ALL=C; unset LC_ALL; trap)
29+
[[ "${output}" == "${expected_output}" ]] || log_error "IFS subshell failed" "${expected_output}" "${output}"
30+
fi

0 commit comments

Comments
 (0)