From c44d47dbe56464cb738b2cd2f70c4823374c632f Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Aug 2022 06:55:44 -0400 Subject: [PATCH 01/13] arrow-cpp: 8.0.0 -> 9.0.0 --- .../libraries/arrow-cpp/default.nix | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pkgs/development/libraries/arrow-cpp/default.nix b/pkgs/development/libraries/arrow-cpp/default.nix index 64fc6a165c47c..16c94a6e59d3e 100644 --- a/pkgs/development/libraries/arrow-cpp/default.nix +++ b/pkgs/development/libraries/arrow-cpp/default.nix @@ -56,25 +56,25 @@ let arrow-testing = fetchFromGitHub { owner = "apache"; repo = "arrow-testing"; - rev = "634739c664433cec366b4b9a81d1e1044a8c5eda"; - hash = "sha256-r1WVgJJsI7v485L6Qb+5i7kFO4Tvxyk1T0JBb4og6pg="; + rev = "5bab2f264a23f5af68f69ea93d24ef1e8e77fc88"; + hash = "sha256-Pxx8ohUpXb5u1995IvXmxQMqWiDJ+7LAll/AjQP7ph8="; }; parquet-testing = fetchFromGitHub { owner = "apache"; repo = "parquet-testing"; - rev = "acd375eb86a81cd856476fca0f52ba6036a067ff"; - hash = "sha256-z/kmi+4dBO/dsVkJA4NgUoxl0pXi8RWIGvI8MGu/gcc="; + rev = "aafd3fc9df431c2625a514fb46626e5614f1d199"; + hash = "sha256-cO5t/mgsbBhbSefx8EMGTyxmgTjhZ8mFujkFQ3p/JS0="; }; in stdenv.mkDerivation rec { pname = "arrow-cpp"; - version = "8.0.0"; + version = "9.0.0"; src = fetchurl { url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz"; - hash = "sha256-rZoFcFEXyYnBFrrprHBJL+AVBQ4bgPsOOP3ktdhjqqM="; + hash = "sha256-qaAz8KNJAomZj0WGgNGVec8HkRcXumWv3my4AHD3qbU="; }; sourceRoot = "apache-arrow-${version}/cpp"; @@ -82,29 +82,29 @@ stdenv.mkDerivation rec { # https://github.com/apache/arrow/blob/apache-arrow-8.0.0/cpp/thirdparty/versions.txt ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl { - url = "https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2"; - hash = "sha256-NDMOXOJ2CZ4uiVDZM121qHVomkxqVnUe87HYxTf4h/Y="; + url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2"; + hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo="; }; ARROW_MIMALLOC_URL = fetchFromGitHub { owner = "microsoft"; repo = "mimalloc"; - rev = "v1.7.3"; - hash = "sha256-Ca877VitpWyKmZNHavqgewk/P+tyd2xHDNVqveKh87M="; + rev = "v2.0.6"; + hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc="; }; ARROW_XSIMD_URL = fetchFromGitHub { owner = "xtensor-stack"; repo = "xsimd"; - rev = "7d1778c3b38d63db7cec7145d939f40bc5d859d1"; - hash = "sha256-89AysBUVnTdWyMPazeJegnQ6WEH90Ns7qQInZLMSXY4="; + rev = "8.1.0"; + hash = "sha256-Aqs6XJkGjAjGAp0PprabSM4m+32M/UXpSHppCHdzaZk="; }; ARROW_SUBSTRAIT_URL = fetchFromGitHub { owner = "substrait-io"; repo = "substrait"; - rev = "e1b4c04a1b518912f4c4065b16a1b2c0ac8e14cf"; - hash = "sha256-56FSjDngsROSHLjMv+OYAIYqphEu3GzgIMHbgh/ZQw0="; + rev = "v0.6.0"; + hash = "sha256-hxCBomL4Qg9cHLRg9ZiO9k+JVOZXn6f4ikPtK+V9tno="; }; patches = [ @@ -159,7 +159,6 @@ stdenv.mkDerivation rec { ''; cmakeFlags = [ - "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON" "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}" "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}" "-DARROW_BUILD_TESTS=ON" @@ -168,7 +167,7 @@ stdenv.mkDerivation rec { "-DARROW_EXTRA_ERROR_CONTEXT=ON" "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" "-DARROW_DEPENDENCY_SOURCE=SYSTEM" - "-DThrift_SOURCE=AUTO" # search for Thrift using pkg-config (ThriftConfig.cmake requires OpenSSL and libevent) + "-Dxsimd_SOURCE=AUTO" "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}" "-DARROW_COMPUTE=ON" "-DARROW_CSV=ON" From cf192f5db16c245f3cfe299957347c6b3565edb1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Aug 2022 07:57:09 -0400 Subject: [PATCH 02/13] arrow-cpp: remove unused `jemalloc` input --- pkgs/development/libraries/arrow-cpp/default.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/pkgs/development/libraries/arrow-cpp/default.nix b/pkgs/development/libraries/arrow-cpp/default.nix index 16c94a6e59d3e..1bdf1a411e77e 100644 --- a/pkgs/development/libraries/arrow-cpp/default.nix +++ b/pkgs/development/libraries/arrow-cpp/default.nix @@ -18,7 +18,6 @@ , google-cloud-cpp , grpc , gtest -, jemalloc , libbacktrace , lz4 , minio From 56e9e52cc2b4cd0ac8512f69218c54069ec3fcdb Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Aug 2022 07:57:27 -0400 Subject: [PATCH 03/13] arrow-cpp: add commentary for vendored `jemalloc` and `mimalloc` --- pkgs/development/libraries/arrow-cpp/default.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkgs/development/libraries/arrow-cpp/default.nix b/pkgs/development/libraries/arrow-cpp/default.nix index 1bdf1a411e77e..12fa9ffb9a907 100644 --- a/pkgs/development/libraries/arrow-cpp/default.nix +++ b/pkgs/development/libraries/arrow-cpp/default.nix @@ -78,13 +78,16 @@ stdenv.mkDerivation rec { sourceRoot = "apache-arrow-${version}/cpp"; # versions are all taken from - # https://github.com/apache/arrow/blob/apache-arrow-8.0.0/cpp/thirdparty/versions.txt + # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt + # jemalloc: arrow uses a custom prefix to prevent default allocator symbol + # collisions as well as custom build flags ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl { url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2"; hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo="; }; + # mimalloc: arrow uses custom build flags for mimalloc ARROW_MIMALLOC_URL = fetchFromGitHub { owner = "microsoft"; repo = "mimalloc"; From 829e7c904dcca618c3ea4a385dd5a77eb0242f0f Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Aug 2022 08:37:01 -0400 Subject: [PATCH 04/13] python3Packages.pyarrow: get tests working with arrow-cpp 9.0.0 --- pkgs/development/python-modules/pyarrow/default.nix | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkgs/development/python-modules/pyarrow/default.nix b/pkgs/development/python-modules/pyarrow/default.nix index 53dde0cd0f616..83d8a19007daa 100644 --- a/pkgs/development/python-modules/pyarrow/default.nix +++ b/pkgs/development/python-modules/pyarrow/default.nix @@ -111,8 +111,10 @@ buildPythonPackage rec { preCheck = '' shopt -s extglob - rm -r pyarrow/!(tests) - '' + lib.optionalString stdenv.isDarwin '' + rm -r pyarrow/!(conftest.py|tests) + mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py + substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest + '' + lib.optionalString stdenv.isDarwin '' # OSError: [Errno 24] Too many open files ulimit -n 1024 ''; From 941d9e9b3163286d0f910d1a4b88ded53c172d8a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Aug 2022 09:32:54 -0400 Subject: [PATCH 05/13] python3Packages.db-dtypes: bump pyarrow upper bound --- pkgs/development/python-modules/db-dtypes/default.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkgs/development/python-modules/db-dtypes/default.nix b/pkgs/development/python-modules/db-dtypes/default.nix index 69aad6af3f2e6..d8f5913acc63b 100644 --- a/pkgs/development/python-modules/db-dtypes/default.nix +++ b/pkgs/development/python-modules/db-dtypes/default.nix @@ -23,6 +23,11 @@ buildPythonPackage rec { hash = "sha256-LLKhYLzGUQRx4ciWv1TilYvTOO0sj6rdkPlJLPZ8VXA="; }; + postPatch = '' + substituteInPlace setup.py \ + --replace 'pyarrow>=3.0.0, <9.0dev' 'pyarrow>=3.0.0, <10.0dev' + ''; + propagatedBuildInputs = [ numpy packaging From 4b664ed4e3b94adb299c3e3b392b8dfb42eedece Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 4 Aug 2022 12:41:55 -0400 Subject: [PATCH 06/13] python3Packages.apache-beam: remove numpy constraint --- pkgs/development/python-modules/apache-beam/default.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkgs/development/python-modules/apache-beam/default.nix b/pkgs/development/python-modules/apache-beam/default.nix index 19e2cc827b94b..09385f3c37b6d 100644 --- a/pkgs/development/python-modules/apache-beam/default.nix +++ b/pkgs/development/python-modules/apache-beam/default.nix @@ -55,7 +55,8 @@ buildPythonPackage rec { postPatch = '' substituteInPlace setup.py \ --replace "dill>=0.3.1.1,<0.3.2" "dill" \ - --replace "pyarrow>=0.15.1,<8.0.0" "pyarrow" + --replace "pyarrow>=0.15.1,<8.0.0" "pyarrow" \ + --replace "numpy>=1.14.3,<1.23.0" "numpy" ''; sourceRoot = "source/sdks/python"; From bf3489173acbda5ac1113231d616e5b1af7551d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Mancilla?= Date: Sat, 6 Aug 2022 02:11:07 -0400 Subject: [PATCH 07/13] arrow-cpp: fix failing tests on darwin sandbox --- pkgs/development/libraries/arrow-cpp/default.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/pkgs/development/libraries/arrow-cpp/default.nix b/pkgs/development/libraries/arrow-cpp/default.nix index 12fa9ffb9a907..d3b736fbe0f0d 100644 --- a/pkgs/development/libraries/arrow-cpp/default.nix +++ b/pkgs/development/libraries/arrow-cpp/default.nix @@ -230,6 +230,7 @@ stdenv.mkDerivation rec { ]; in lib.optionalString doInstallCheck "-${builtins.concatStringsSep ":" filteredTests}"; + __darwinAllowLocalNetworking = true; installCheckInputs = [ perl which sqlite ] ++ lib.optional enableS3 minio; installCheckPhase = let From 574b3635a8a64dd01fbb6e209fca490157840bff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Mancilla?= Date: Sat, 6 Aug 2022 02:11:34 -0400 Subject: [PATCH 08/13] python3Packages.pyarrow: fix failing tests on darwin sandbox --- pkgs/development/python-modules/pyarrow/default.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkgs/development/python-modules/pyarrow/default.nix b/pkgs/development/python-modules/pyarrow/default.nix index 83d8a19007daa..bdd907176f99f 100644 --- a/pkgs/development/python-modules/pyarrow/default.nix +++ b/pkgs/development/python-modules/pyarrow/default.nix @@ -79,6 +79,8 @@ buildPythonPackage rec { dontUseCmakeConfigure = true; + __darwinAllowLocalNetworking = true; + preBuild = '' export PYARROW_PARALLEL=$NIX_BUILD_CORES ''; From 8b40f591614e5e3ffa9d880efe5eb732e0b69031 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 6 Aug 2022 06:48:34 -0400 Subject: [PATCH 09/13] python3Packages.google-cloud-bigquery: bump pyarrow constraint --- .../python-modules/google-cloud-bigquery/default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/python-modules/google-cloud-bigquery/default.nix b/pkgs/development/python-modules/google-cloud-bigquery/default.nix index 27db85ab488c1..68410670b04dc 100644 --- a/pkgs/development/python-modules/google-cloud-bigquery/default.nix +++ b/pkgs/development/python-modules/google-cloud-bigquery/default.nix @@ -31,7 +31,7 @@ buildPythonPackage rec { postPatch = '' substituteInPlace setup.py \ - --replace 'pyarrow >= 3.0.0, < 8.0dev' 'pyarrow >= 3.0.0, < 9.0dev' + --replace 'pyarrow >= 3.0.0, < 8.0dev' 'pyarrow >= 3.0.0, < 10.0dev' ''; propagatedBuildInputs = [ From e41f448fcc1a0d0b083c8e15932b1059e9f47488 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sun, 7 Aug 2022 06:27:19 -0400 Subject: [PATCH 10/13] python3Packages.google-cloud-bigquery: ignore network-based test --- .../python-modules/google-cloud-bigquery/default.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkgs/development/python-modules/google-cloud-bigquery/default.nix b/pkgs/development/python-modules/google-cloud-bigquery/default.nix index 68410670b04dc..da01f970df32e 100644 --- a/pkgs/development/python-modules/google-cloud-bigquery/default.nix +++ b/pkgs/development/python-modules/google-cloud-bigquery/default.nix @@ -31,7 +31,7 @@ buildPythonPackage rec { postPatch = '' substituteInPlace setup.py \ - --replace 'pyarrow >= 3.0.0, < 8.0dev' 'pyarrow >= 3.0.0, < 10.0dev' + --replace 'pyarrow >= 3.0.0, < 9.0dev' 'pyarrow >= 3.0.0, < 10.0dev' ''; propagatedBuildInputs = [ @@ -84,6 +84,7 @@ buildPythonPackage rec { "test__initiate_resumable_upload" "test__initiate_resumable_upload_mtls" "test__initiate_resumable_upload_with_retry" + "test_table_clones" ]; disabledTestPaths = [ From d9202ab62548a564beec192ab1cb4f92b010be9b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 8 Aug 2022 05:30:37 -0400 Subject: [PATCH 11/13] python3Packages.apache-beam: unrestrict `pymongo` dependency --- pkgs/development/python-modules/apache-beam/default.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkgs/development/python-modules/apache-beam/default.nix b/pkgs/development/python-modules/apache-beam/default.nix index 09385f3c37b6d..541e5005167d4 100644 --- a/pkgs/development/python-modules/apache-beam/default.nix +++ b/pkgs/development/python-modules/apache-beam/default.nix @@ -56,7 +56,8 @@ buildPythonPackage rec { substituteInPlace setup.py \ --replace "dill>=0.3.1.1,<0.3.2" "dill" \ --replace "pyarrow>=0.15.1,<8.0.0" "pyarrow" \ - --replace "numpy>=1.14.3,<1.23.0" "numpy" + --replace "numpy>=1.14.3,<1.23.0" "numpy" \ + --replace "pymongo>=3.8.0,<4.0.0" "pymongo" ''; sourceRoot = "source/sdks/python"; From 21237eedfea249c34aee2277da56420b6d058d11 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 8 Aug 2022 05:31:07 -0400 Subject: [PATCH 12/13] python3Packages.apache-beam: set `enableParallelBuilding` to `true` --- pkgs/development/python-modules/apache-beam/default.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkgs/development/python-modules/apache-beam/default.nix b/pkgs/development/python-modules/apache-beam/default.nix index 541e5005167d4..508d6670c6d20 100644 --- a/pkgs/development/python-modules/apache-beam/default.nix +++ b/pkgs/development/python-modules/apache-beam/default.nix @@ -92,6 +92,8 @@ buildPythonPackage rec { typing-extensions ]; + enableParallelBuilding = true; + pythonImportsCheck = [ "apache_beam" ]; From 7d79dd8e52e8c2113dd6129be17fd6300fb56fd1 Mon Sep 17 00:00:00 2001 From: Martin Weinelt Date: Thu, 11 Aug 2022 01:19:05 +0200 Subject: [PATCH 13/13] python3Packages.dask-gateway: fix build --- .../python-modules/dask-gateway/default.nix | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pkgs/development/python-modules/dask-gateway/default.nix b/pkgs/development/python-modules/dask-gateway/default.nix index 5f2634a174809..f0e5a54c95bf0 100644 --- a/pkgs/development/python-modules/dask-gateway/default.nix +++ b/pkgs/development/python-modules/dask-gateway/default.nix @@ -1,6 +1,6 @@ { lib , buildPythonPackage -, fetchPypi +, fetchFromGitHub , aiohttp , dask , distributed @@ -10,12 +10,17 @@ buildPythonPackage rec { pname = "dask-gateway"; # update dask-gateway lock step with dask-gateway-server version = "2022.6.1"; + format = "pyproject"; - src = fetchPypi { - inherit pname version; - sha256 = "sha256-i0OFXjvDg+D4Sdyg6rluP0k6/Ecr+VZn+RiIEQONQX0="; + src = fetchFromGitHub { + owner = "dask"; + repo = "dask-gateway"; + rev = "refs/tags/${version}"; + hash = "sha256-PsagZdEPpeuZH9hFL98xB5z6zOdd4Cx/skGQ0eOYkCA="; }; + sourceRoot = "source/dask-gateway"; + propagatedBuildInputs = [ aiohttp dask @@ -31,6 +36,6 @@ buildPythonPackage rec { description = "A client library for interacting with a dask-gateway server"; homepage = "https://gateway.dask.org/"; license = licenses.bsd3; - maintainers = [ maintainers.costrouc ]; + maintainers = with maintainers; [ costrouc ]; }; }