Skip to content

Commit 17cfa01

Browse files
fmeumcopybara-github
authored andcommitted
Default to a UTF-8 locale in Java stub template
On non-macOS Unix, without any locale variable set, the OpenJDK defaults to using ASCII rather than UTF-8 as the encoding for file system paths (i.e., the value of the `sun.jnu.encoding` property). Fixes bazelbuild#15106 Closes bazelbuild#15159. PiperOrigin-RevId: 445520597
1 parent 793ae52 commit 17cfa01

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed

src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt

+21
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,16 @@ function is_macos() {
9494
[[ "${OSTYPE}" =~ darwin* ]]
9595
}
9696

97+
function available_utf8_locale() {
98+
# Both C.UTF-8 and en_US.UTF-8 do not cause any language-specific effects
99+
# when set as LC_CTYPE, but neither is certain to exist on all systems.
100+
if [[ $(LC_CTYPE=C.UTF-8 locale charmap 2>/dev/null) == "UTF-8" ]]; then
101+
echo "C.UTF-8"
102+
elif [[ $(LC_CTYPE=en_US.UTF-8 locale charmap 2>/dev/null) == "UTF-8" ]]; then
103+
echo "en_US.UTF-8"
104+
fi
105+
}
106+
97107
# Parse arguments sequentially until the first unrecognized arg is encountered.
98108
# Scan the remaining args for --wrapper_script_flag=X options and process them.
99109
ARGS=()
@@ -362,6 +372,17 @@ if [ -z "$CLASSPATH_LIMIT" ]; then
362372
is_windows && CLASSPATH_LIMIT=7000 || CLASSPATH_LIMIT=120000
363373
fi
364374

375+
# On non-macOS Unix, without any locale variable set, the JVM would use
376+
# using ASCII rather than UTF-8 as the encoding for file system paths.
377+
if ! is_macos; then
378+
if [ -z ${LC_CTYPE+x} ] && [ -z ${LC_ALL+x} ] && [ -z ${LANG+x} ]; then
379+
UTF8_LOCALE=$(available_utf8_locale)
380+
if [[ -n "$UTF8_LOCALE" ]]; then
381+
export LC_CTYPE="$UTF8_LOCALE"
382+
fi
383+
fi
384+
fi
385+
365386
if (("${#CLASSPATH}" > ${CLASSPATH_LIMIT})); then
366387
export JACOCO_IS_JAR_WRAPPED=1
367388
create_and_run_classpath_jar

src/test/shell/bazel/unicode_filenames_test.sh

+31
Original file line numberDiff line numberDiff line change
@@ -206,4 +206,35 @@ function test_utf8_source_artifact_in_bep() {
206206
expect_log '"name":"pkg/srcs/ünïcödë fïlë.txt"'
207207
}
208208

209+
function test_utf8_filename_in_java_test() {
210+
# Intentionally do not check for available locales: Either C.UTF_8 or
211+
# en_US.UTF-8 should exist on all CI machines - if not, we want to learn about
212+
# this so that the Java stub template can be adapted accordingly.
213+
214+
touch WORKSPACE
215+
mkdir pkg
216+
217+
cat >pkg/BUILD <<'EOF'
218+
java_test(
219+
name = "Test",
220+
srcs = ["Test.java"],
221+
main_class = "Test",
222+
use_testrunner = False,
223+
)
224+
EOF
225+
226+
cat >pkg/Test.java <<'EOF'
227+
import java.nio.file.Files;
228+
import java.io.IOException;
229+
230+
class Test {
231+
public static void main(String[] args) throws IOException {
232+
Files.createTempFile("æøå", null);
233+
}
234+
}
235+
EOF
236+
237+
bazel test //pkg:Test --test_output=errors 2>$TEST_log || fail "Test should pass"
238+
}
239+
209240
run_suite "Tests for handling of Unicode filenames"

0 commit comments

Comments
 (0)