From 265e0779ceadc265f22463ac01386883e2f905ce Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Thu, 9 May 2019 13:33:35 -0400 Subject: [PATCH 1/5] glibc: add patch to use utf-8 when LANG is unset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the default builder to use utf-8 encoding. Previously, when LANG and LC_ALL was left unset, glibc would default to “C”. This is not good in many. As a result of this, I think we can get rid of a lot of uses of LC_ALL in Nixpkgs, where utf-8 was needed. These can be found by grepping for: (?:LC_ALL|LANG) *= *["']?(?:en_US|C)\.UTF-8["']? --- pkgs/development/libraries/glibc/common.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkgs/development/libraries/glibc/common.nix b/pkgs/development/libraries/glibc/common.nix index 33c8e5076daa7..7c4cda032e817 100644 --- a/pkgs/development/libraries/glibc/common.nix +++ b/pkgs/development/libraries/glibc/common.nix @@ -92,6 +92,11 @@ stdenv.mkDerivation ({ url = "https://salsa.debian.org/glibc-team/glibc/raw/49767c9f7de4828220b691b29de0baf60d8a54ec/debian/patches/localedata/locale-C.diff"; sha256 = "0irj60hs2i91ilwg5w7sqrxb695c93xg0ik7yhhq9irprd7fidn4"; }) + (fetchurl { + url = "https://bugs.debian.org/cgi-bin/bugreport.cgi?att=1;bug=874160;filename=0001-Default-to-C.UTF-8-on-setlocale-.-if-no-env-vars-are.patch;msg=5"; + name = "0001-Default-to-C.UTF-8-on-setlocale-.-if-no-env-vars-are.patch"; + sha256 = "05wnpib83ggqnr1c85ajrak00478hwalrb3q7pgnxlzs0axw2iyk"; + }) ] ++ lib.optional stdenv.isx86_64 ./fix-x64-abi.patch ++ lib.optional stdenv.hostPlatform.isMusl ./fix-rpc-types-musl-conflicts.patch From e05d59a1bc89316eb3f0a15a8a25690d2fb31b20 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Thu, 9 May 2019 14:04:40 -0400 Subject: [PATCH 2/5] =?UTF-8?q?treewide:=20don=E2=80=99t=20assume=20we?= =?UTF-8?q?=E2=80=99re=20all=20americans?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A couple of places set LANG=“en_US.UTF-8” to get unicode working in the Nix builder. This is very very bad! Not everyone who uses Nix lives in the U.S. and is an English speaker. Instead we should use the “C.UTF-8”, which is an UTF-8 capable version of the unlocalized C locale. Please use this one! The previous commit makes Glibc fall back to this locale when LANG is unset. This is in line with the Musl behavior. As a result, the Nix builder is made UTF-8 capable without any LOCALE_ARCHIVE or LANG hacks. --- pkgs/build-support/agda/default.nix | 6 ------ pkgs/build-support/release/maven-build.nix | 2 -- pkgs/development/haskell-modules/generic-builder.nix | 5 +---- pkgs/development/haskell-modules/make-package-set.nix | 4 ---- pkgs/development/interpreters/elixir/generic-builder.nix | 3 --- 5 files changed, 1 insertion(+), 19 deletions(-) diff --git a/pkgs/build-support/agda/default.nix b/pkgs/build-support/agda/default.nix index 16fe748c3e5c7..209dfd6785506 100644 --- a/pkgs/build-support/agda/default.nix +++ b/pkgs/build-support/agda/default.nix @@ -24,12 +24,6 @@ let self.buildDepends; buildDependsAgdaShareAgda = map (x: x + "/share/agda") self.buildDependsAgda; - # Not much choice here ;) - LANG = "en_US.UTF-8"; - LOCALE_ARCHIVE = stdenv.lib.optionalString - stdenv.isLinux - "${glibcLocales}/lib/locale/locale-archive"; - everythingFile = "Everything.agda"; propagatedBuildInputs = self.buildDependsAgda; diff --git a/pkgs/build-support/release/maven-build.nix b/pkgs/build-support/release/maven-build.nix index f7ea07baccbe1..4999ce86683e5 100644 --- a/pkgs/build-support/release/maven-build.nix +++ b/pkgs/build-support/release/maven-build.nix @@ -23,8 +23,6 @@ stdenv.mkDerivation ( rec { runHook preSetupPhase mkdir -p $out/nix-support - export LANG="en_US.UTF-8" - export LOCALE_ARCHIVE=$glibcLocales/lib/locale/locale-archive export M2_REPO=$TMPDIR/repository runHook postSetupPhase diff --git a/pkgs/development/haskell-modules/generic-builder.nix b/pkgs/development/haskell-modules/generic-builder.nix index 87d3b5ae496c1..6a750175cb439 100644 --- a/pkgs/development/haskell-modules/generic-builder.nix +++ b/pkgs/development/haskell-modules/generic-builder.nix @@ -6,7 +6,7 @@ let isCross = stdenv.buildPlatform != stdenv.hostPlatform; inherit (buildPackages) fetchurl removeReferencesTo - pkgconfig coreutils gnugrep gnused glibcLocales; + pkgconfig coreutils gnugrep gnused; in { pname @@ -262,8 +262,6 @@ stdenv.mkDerivation ({ buildInputs = otherBuildInputs ++ optionals (!isLibrary) propagatedBuildInputs; propagatedBuildInputs = optionals isLibrary propagatedBuildInputs; - LANG = "en_US.UTF-8"; # GHC needs the locale configured during the Haddock phase. - prePatch = optionalString (editedCabalFile != null) '' echo "Replace Cabal file with edited version from ${newCabalFileUrl}." cp ${newCabalFile} ${pname}.cabal @@ -513,6 +511,5 @@ stdenv.mkDerivation ({ // optionalAttrs (postFixup != "") { inherit postFixup; } // optionalAttrs (dontStrip) { inherit dontStrip; } // optionalAttrs (hardeningDisable != []) { inherit hardeningDisable; } -// optionalAttrs (stdenv.buildPlatform.libc == "glibc"){ LOCALE_ARCHIVE = "${glibcLocales}/lib/locale/locale-archive"; } ) ) diff --git a/pkgs/development/haskell-modules/make-package-set.nix b/pkgs/development/haskell-modules/make-package-set.nix index a4c040673487e..8fb12caf2f865 100644 --- a/pkgs/development/haskell-modules/make-package-set.nix +++ b/pkgs/development/haskell-modules/make-package-set.nix @@ -128,8 +128,6 @@ let preferLocalBuild = true; allowSubstitutes = false; phases = ["installPhase"]; - LANG = "en_US.UTF-8"; - LOCALE_ARCHIVE = pkgs.lib.optionalString (buildPlatform.libc == "glibc") "${buildPackages.glibcLocales}/lib/locale/locale-archive"; installPhase = '' export HOME="$TMP" mkdir -p "$out" @@ -311,8 +309,6 @@ in package-set { inherit pkgs stdenv callPackage; } self // { nativeBuildInputs = [ ghcEnv ] ++ nativeBuildInputs ++ mkDrvArgs.nativeBuildInputs or []; phases = ["installPhase"]; installPhase = "echo $nativeBuildInputs $buildInputs > $out"; - LANG = "en_US.UTF-8"; - LOCALE_ARCHIVE = pkgs.lib.optionalString (stdenv.hostPlatform.libc == "glibc") "${buildPackages.glibcLocales}/lib/locale/locale-archive"; "NIX_${ghcCommandCaps}" = "${ghcEnv}/bin/${ghcCommand}"; "NIX_${ghcCommandCaps}PKG" = "${ghcEnv}/bin/${ghcCommand}-pkg"; # TODO: is this still valid? diff --git a/pkgs/development/interpreters/elixir/generic-builder.nix b/pkgs/development/interpreters/elixir/generic-builder.nix index b7e98af21097f..5c1eae1ae2d41 100644 --- a/pkgs/development/interpreters/elixir/generic-builder.nix +++ b/pkgs/development/interpreters/elixir/generic-builder.nix @@ -22,9 +22,6 @@ in buildInputs = [ erlang rebar makeWrapper ]; - LANG = "C.UTF-8"; - LC_TYPE = "C.UTF-8"; - setupHook = ./setup-hook.sh; inherit debugInfo; From 0ebb6d22868ffdc58e52276dea0337f6a0a3b2ce Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Thu, 9 May 2019 16:57:10 -0400 Subject: [PATCH 3/5] darwin-stdenv: set LC_CTYPE to UTF-8 Unfortunately, the default macOS locale does not have UTF-8 support. You need to set this for correct behavior. --- pkgs/stdenv/darwin/default.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/pkgs/stdenv/darwin/default.nix b/pkgs/stdenv/darwin/default.nix index f7a40bb0d0e74..d95b97b916fb2 100644 --- a/pkgs/stdenv/darwin/default.nix +++ b/pkgs/stdenv/darwin/default.nix @@ -37,6 +37,7 @@ in rec { export CMAKE_OSX_ARCHITECTURES=x86_64 # Workaround for https://openradar.appspot.com/22671534 on 10.11. export gl_cv_func_getcwd_abort_bug=no + export LC_CTYPE=UTF-8 ''; bootstrapTools = derivation rec { From a5999f6ecdb7b204241f6a8e0625f817318fe3b3 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Thu, 11 Jul 2019 11:50:02 -0400 Subject: [PATCH 4/5] gnulib: skip localename tests These break due to default locale not being what gnulib expects. --- pkgs/tools/misc/coreutils/default.nix | 5 +++++ pkgs/tools/misc/findutils/default.nix | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/pkgs/tools/misc/coreutils/default.nix b/pkgs/tools/misc/coreutils/default.nix index c80bb64d0bbd3..cbd958eb27e24 100644 --- a/pkgs/tools/misc/coreutils/default.nix +++ b/pkgs/tools/misc/coreutils/default.nix @@ -47,6 +47,11 @@ stdenv.mkDerivation rec { sed '2i print "Skipping env -S test"; exit 77;' -i ./tests/misc/env-S.pl + # Skip default locale tests. These appear to assume that + # setlocale(LC_ALL, "") = "C" when the default in Nixpkgs is + # "C.UTF-8" + echo "int main() { return 77; }" > gnulib-tests/test-localename.c + # these tests fail in the unprivileged nix sandbox (without nix-daemon) as we break posix assumptions for f in ./tests/chgrp/{basic.sh,recurse.sh,default-no-deref.sh,no-x.sh,posix-H.sh}; do sed '2i echo Skipping chgrp && exit 77' -i "$f" diff --git a/pkgs/tools/misc/findutils/default.nix b/pkgs/tools/misc/findutils/default.nix index 9db66480cb107..26baa106c63ee 100644 --- a/pkgs/tools/misc/findutils/default.nix +++ b/pkgs/tools/misc/findutils/default.nix @@ -19,6 +19,13 @@ stdenv.mkDerivation rec { ./disable-getdtablesize-test.patch ]; + postPatch = '' + # Skip default locale tests. These appear to assume that + # setlocale(LC_ALL, "") = "C" when the default in Nixpkgs is + # "C.UTF-8" + echo "int main() { return 77; }" > tests/test-localename.c + ''; + buildInputs = [ coreutils ]; # bin/updatedb script needs to call sort # Since glibc-2.25 the i686 tests hang reliably right after test-sleep. From 9b218d17dde22e2c0d9965ebf63af6c7b0507367 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Wed, 17 Jul 2019 15:50:22 -0400 Subject: [PATCH 5/5] nixos: add release notes for glibc default locale --- nixos/doc/manual/release-notes/rl-1909.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/nixos/doc/manual/release-notes/rl-1909.xml b/nixos/doc/manual/release-notes/rl-1909.xml index 53f5b8bb73217..8ac2e9ab6f262 100644 --- a/nixos/doc/manual/release-notes/rl-1909.xml +++ b/nixos/doc/manual/release-notes/rl-1909.xml @@ -318,6 +318,18 @@ The mercurial httpd.extraSubservice has been removed from nixpkgs due to lack of maintainer. + + + The default locale used by Glibc is now + C.UTF-8 instead of C. + This means that unicode works in Nix builders out of the box. + The previous behavior can be restored by setting + LANG="C". More information on these settings + is available in the Glibc manual at + 7.3 Locale Categories. + +