diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a-CUDA-11.3.1.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a-CUDA-11.3.1.eb index 3dedead0354d..33304aa7d22f 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a-CUDA-11.3.1.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a-CUDA-11.3.1.eb @@ -8,15 +8,9 @@ PyTorch is a deep learning framework that puts Python first.""" toolchain = {'name': 'foss', 'version': '2021a'} -sources = [{ - 'filename': '%(name)s-%(version)s.tar.gz', - 'git_config': { - 'url': 'https://github.com/pytorch', - 'repo_name': 'pytorch', - 'tag': 'v%(version)s', - 'recursive': True, - }, -}] +source_urls = [GITHUB_RELEASE] +sources = ['%(namelower)s-v%(version)s.tar.gz'] + patches = [ 'PyTorch-1.7.0_avoid-nan-in-test-torch.patch', 'PyTorch-1.7.0_disable-dev-shm-test.patch', @@ -24,18 +18,27 @@ patches = [ 'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch', 'PyTorch-1.8.1_increase-distributed-test-timeout.patch', 'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch', - 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch', 'PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch', - 'PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch', + 'PyTorch-1.10.0_fix-faulty-asserts.patch', + 'PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch', + 'PyTorch-1.10.0_fix-kineto-crash.patch', + 'PyTorch-1.10.0_fix-missing-VSX-dispatch.patch', 'PyTorch-1.10.0_fix-test-cond-cpu.patch', + 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_fix-test-model_dump.patch', 'PyTorch-1.10.0_fix-vnni-detection.patch', + 'PyTorch-1.10.0_fix-vsx-vector-functions.patch', + 'PyTorch-1.10.0_fix-XNNPACK-tests.patch', 'PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch', + 'PyTorch-1.10.0_remove-vec-dump-functions.patch', + 'PyTorch-1.10.0_skip_cmake_rpath.patch', 'PyTorch-1.10.0_skip_failing_ops_tests.patch', 'PyTorch-1.10.0_skip_nan_tests_openblas.patch', - 'PyTorch-1.10.0_skip_cmake_rpath.patch', + 'PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch', ] checksums = [ - None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone' + '1970225700d1f9791eb553855ad3c22e7c9fd69cff2765d532506b33e449831e', # pytorch-v1.10.0.tar.gz 'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch # PyTorch-1.7.1_correctly-pass-jit_opt_level.patch @@ -45,24 +48,42 @@ checksums = [ '7a6e512274f0b8673f4f207a5bc53387d88be7e79833f42d20365668b2118071', # PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch 'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea', - # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch - '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', + # PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch + '0c8bf4b284c5c2cb4d175645d44bed674cca5218de741a8ea698cef2884a86da', # PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch '426c9ead1a74b656748d4c8bf8afd4303d8b9f2394ad22b21a845d07c8ca1d12', - # PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch - '67152215e4530a9b1d7349fb20864445fd815288f04ab9e96e45c73b2d87827a', + # PyTorch-1.10.0_fix-faulty-asserts.patch + '44d2bb998ec3b3b65659f3327894745888a58f957a191cb524b4003018db51a2', + # PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch + 'dfc09c2e787e757191a0ebf68ace46e2a90d71263ba84e2ee764b596d2ee6769', + # PyTorch-1.10.0_fix-kineto-crash.patch + 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb', + # PyTorch-1.10.0_fix-missing-VSX-dispatch.patch + '29368e69e98cb16659a2043b7a0df0411c99bc57a70405f8b5300f68c25f3ee9', # PyTorch-1.10.0_fix-test-cond-cpu.patch '51f83f5d5ef69656ef35b73f17e0671e70113798421be11ea4c7b56ffcc4da03', + # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch + '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', + # PyTorch-1.10.0_fix-test-model_dump.patch + '339148ae1a028cda6e750ac93fa38a599f66c7abe26586c9219f1a206ea14557', # PyTorch-1.10.0_fix-vnni-detection.patch '1f3664c0febfa2a3fc4c0cd3bae185f289716ac0b6c3d7e8fa1cee19ba62b7cc', + # PyTorch-1.10.0_fix-vsx-vector-functions.patch + '7bef5f96cb83b2d655d2f76dd7468a171d446f0b3e06da2232ec7f886484d312', + # PyTorch-1.10.0_fix-XNNPACK-tests.patch + 'd3e749a2a42efce463e3b8a1aebb21f0edf2256682c4417297d9a44a6210e5f8', # PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch 'e65afb01786f7f030ccb5faada1eb474bb0c418bcadcf1baaa71a4fa2f3f4240', + # PyTorch-1.10.0_remove-vec-dump-functions.patch + '5e796c83f74e77aa193fef2720a7a41e3e0eb39df726a6f0a2a1c55d37d8fcc8', + # PyTorch-1.10.0_skip_cmake_rpath.patch + 'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_failing_ops_tests.patch '399af94ffcef4a6db5226552c46f11e9b0f0f371b2d7924b9e5764d2281581ab', # PyTorch-1.10.0_skip_nan_tests_openblas.patch '7d3f83e3056d9e47a460790313238f28708beb596cafaa7ae55e374d368bbedf', - # PyTorch-1.10.0_skip_cmake_rpath.patch - 'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', + # PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch + '34ba476a7bcddec323bf9eca083cb4623d0f569d081aa3add3769c24f22849d2', ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a.eb index c05632ab43ff..208a35c065af 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a.eb @@ -7,32 +7,35 @@ PyTorch is a deep learning framework that puts Python first.""" toolchain = {'name': 'foss', 'version': '2021a'} -sources = [{ - 'filename': '%(name)s-%(version)s.tar.gz', - 'git_config': { - 'url': 'https://github.com/pytorch', - 'repo_name': 'pytorch', - 'tag': 'v%(version)s', - 'recursive': True, - }, -}] +source_urls = [GITHUB_RELEASE] +sources = ['%(namelower)s-v%(version)s.tar.gz'] + patches = [ 'PyTorch-1.7.0_avoid-nan-in-test-torch.patch', 'PyTorch-1.7.0_disable-dev-shm-test.patch', 'PyTorch-1.7.1_correctly-pass-jit_opt_level.patch', 'PyTorch-1.8.1_increase-distributed-test-timeout.patch', 'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch', - 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch', 'PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch', - 'PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch', + 'PyTorch-1.10.0_fix-faulty-asserts.patch', + 'PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch', + 'PyTorch-1.10.0_fix-kineto-crash.patch', + 'PyTorch-1.10.0_fix-missing-VSX-dispatch.patch', 'PyTorch-1.10.0_fix-test-cond-cpu.patch', + 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_fix-test-model_dump.patch', 'PyTorch-1.10.0_fix-vnni-detection.patch', + 'PyTorch-1.10.0_fix-vsx-vector-functions.patch', + 'PyTorch-1.10.0_fix-XNNPACK-tests.patch', 'PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch', + 'PyTorch-1.10.0_remove-vec-dump-functions.patch', 'PyTorch-1.10.0_skip_failing_ops_tests.patch', 'PyTorch-1.10.0_skip_nan_tests_openblas.patch', + 'PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch', ] checksums = [ - None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone' + '1970225700d1f9791eb553855ad3c22e7c9fd69cff2765d532506b33e449831e', # pytorch-v1.10.0.tar.gz 'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch # PyTorch-1.7.1_correctly-pass-jit_opt_level.patch @@ -41,22 +44,40 @@ checksums = [ '7a6e512274f0b8673f4f207a5bc53387d88be7e79833f42d20365668b2118071', # PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch 'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea', - # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch - '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', + # PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch + '0c8bf4b284c5c2cb4d175645d44bed674cca5218de741a8ea698cef2884a86da', # PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch '426c9ead1a74b656748d4c8bf8afd4303d8b9f2394ad22b21a845d07c8ca1d12', - # PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch - '67152215e4530a9b1d7349fb20864445fd815288f04ab9e96e45c73b2d87827a', + # PyTorch-1.10.0_fix-faulty-asserts.patch + '44d2bb998ec3b3b65659f3327894745888a58f957a191cb524b4003018db51a2', + # PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch + 'dfc09c2e787e757191a0ebf68ace46e2a90d71263ba84e2ee764b596d2ee6769', + # PyTorch-1.10.0_fix-kineto-crash.patch + 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb', + # PyTorch-1.10.0_fix-missing-VSX-dispatch.patch + '29368e69e98cb16659a2043b7a0df0411c99bc57a70405f8b5300f68c25f3ee9', # PyTorch-1.10.0_fix-test-cond-cpu.patch '51f83f5d5ef69656ef35b73f17e0671e70113798421be11ea4c7b56ffcc4da03', + # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch + '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', + # PyTorch-1.10.0_fix-test-model_dump.patch + '339148ae1a028cda6e750ac93fa38a599f66c7abe26586c9219f1a206ea14557', # PyTorch-1.10.0_fix-vnni-detection.patch '1f3664c0febfa2a3fc4c0cd3bae185f289716ac0b6c3d7e8fa1cee19ba62b7cc', + # PyTorch-1.10.0_fix-vsx-vector-functions.patch + '7bef5f96cb83b2d655d2f76dd7468a171d446f0b3e06da2232ec7f886484d312', + # PyTorch-1.10.0_fix-XNNPACK-tests.patch + 'd3e749a2a42efce463e3b8a1aebb21f0edf2256682c4417297d9a44a6210e5f8', # PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch 'e65afb01786f7f030ccb5faada1eb474bb0c418bcadcf1baaa71a4fa2f3f4240', + # PyTorch-1.10.0_remove-vec-dump-functions.patch + '5e796c83f74e77aa193fef2720a7a41e3e0eb39df726a6f0a2a1c55d37d8fcc8', # PyTorch-1.10.0_skip_failing_ops_tests.patch '399af94ffcef4a6db5226552c46f11e9b0f0f371b2d7924b9e5764d2281581ab', # PyTorch-1.10.0_skip_nan_tests_openblas.patch '7d3f83e3056d9e47a460790313238f28708beb596cafaa7ae55e374d368bbedf', + # PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch + '34ba476a7bcddec323bf9eca083cb4623d0f569d081aa3add3769c24f22849d2', ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-fosscuda-2020b.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-fosscuda-2020b.eb index 938b94c4f938..ffbf98f586c5 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-fosscuda-2020b.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-fosscuda-2020b.eb @@ -7,15 +7,9 @@ PyTorch is a deep learning framework that puts Python first.""" toolchain = {'name': 'fosscuda', 'version': '2020b'} -sources = [{ - 'filename': '%(name)s-%(version)s.tar.gz', - 'git_config': { - 'url': 'https://github.com/pytorch', - 'repo_name': 'pytorch', - 'tag': 'v%(version)s', - 'recursive': True, - }, -}] +source_urls = [GITHUB_RELEASE] +sources = ['%(namelower)s-v%(version)s.tar.gz'] + patches = [ 'PyTorch-1.7.0_avoid-nan-in-test-torch.patch', 'PyTorch-1.7.0_disable-dev-shm-test.patch', @@ -23,18 +17,27 @@ patches = [ 'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch', 'PyTorch-1.8.1_increase-distributed-test-timeout.patch', 'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch', - 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch', 'PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch', - 'PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch', + 'PyTorch-1.10.0_fix-faulty-asserts.patch', + 'PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch', + 'PyTorch-1.10.0_fix-kineto-crash.patch', + 'PyTorch-1.10.0_fix-missing-VSX-dispatch.patch', 'PyTorch-1.10.0_fix-test-cond-cpu.patch', + 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_fix-test-model_dump.patch', 'PyTorch-1.10.0_fix-vnni-detection.patch', + 'PyTorch-1.10.0_fix-vsx-vector-functions.patch', + 'PyTorch-1.10.0_fix-XNNPACK-tests.patch', 'PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch', + 'PyTorch-1.10.0_remove-vec-dump-functions.patch', + 'PyTorch-1.10.0_skip_cmake_rpath.patch', 'PyTorch-1.10.0_skip_failing_ops_tests.patch', 'PyTorch-1.10.0_skip_nan_tests_openblas.patch', - 'PyTorch-1.10.0_skip_cmake_rpath.patch', + 'PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch', ] checksums = [ - None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone' + '1970225700d1f9791eb553855ad3c22e7c9fd69cff2765d532506b33e449831e', # pytorch-v1.10.0.tar.gz 'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch # PyTorch-1.7.1_correctly-pass-jit_opt_level.patch @@ -44,24 +47,42 @@ checksums = [ '7a6e512274f0b8673f4f207a5bc53387d88be7e79833f42d20365668b2118071', # PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch 'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea', - # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch - '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', + # PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch + '0c8bf4b284c5c2cb4d175645d44bed674cca5218de741a8ea698cef2884a86da', # PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch '426c9ead1a74b656748d4c8bf8afd4303d8b9f2394ad22b21a845d07c8ca1d12', - # PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch - '67152215e4530a9b1d7349fb20864445fd815288f04ab9e96e45c73b2d87827a', + # PyTorch-1.10.0_fix-faulty-asserts.patch + '44d2bb998ec3b3b65659f3327894745888a58f957a191cb524b4003018db51a2', + # PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch + 'dfc09c2e787e757191a0ebf68ace46e2a90d71263ba84e2ee764b596d2ee6769', + # PyTorch-1.10.0_fix-kineto-crash.patch + 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb', + # PyTorch-1.10.0_fix-missing-VSX-dispatch.patch + '29368e69e98cb16659a2043b7a0df0411c99bc57a70405f8b5300f68c25f3ee9', # PyTorch-1.10.0_fix-test-cond-cpu.patch '51f83f5d5ef69656ef35b73f17e0671e70113798421be11ea4c7b56ffcc4da03', + # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch + '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', + # PyTorch-1.10.0_fix-test-model_dump.patch + '339148ae1a028cda6e750ac93fa38a599f66c7abe26586c9219f1a206ea14557', # PyTorch-1.10.0_fix-vnni-detection.patch '1f3664c0febfa2a3fc4c0cd3bae185f289716ac0b6c3d7e8fa1cee19ba62b7cc', + # PyTorch-1.10.0_fix-vsx-vector-functions.patch + '7bef5f96cb83b2d655d2f76dd7468a171d446f0b3e06da2232ec7f886484d312', + # PyTorch-1.10.0_fix-XNNPACK-tests.patch + 'd3e749a2a42efce463e3b8a1aebb21f0edf2256682c4417297d9a44a6210e5f8', # PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch 'e65afb01786f7f030ccb5faada1eb474bb0c418bcadcf1baaa71a4fa2f3f4240', + # PyTorch-1.10.0_remove-vec-dump-functions.patch + '5e796c83f74e77aa193fef2720a7a41e3e0eb39df726a6f0a2a1c55d37d8fcc8', + # PyTorch-1.10.0_skip_cmake_rpath.patch + 'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_failing_ops_tests.patch '399af94ffcef4a6db5226552c46f11e9b0f0f371b2d7924b9e5764d2281581ab', # PyTorch-1.10.0_skip_nan_tests_openblas.patch '7d3f83e3056d9e47a460790313238f28708beb596cafaa7ae55e374d368bbedf', - # PyTorch-1.10.0_skip_cmake_rpath.patch - 'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', + # PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch + '34ba476a7bcddec323bf9eca083cb4623d0f569d081aa3add3769c24f22849d2', ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch new file mode 100644 index 000000000000..8e74d538e621 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_add-dummy-bfloat16-for-ppc.patch @@ -0,0 +1,116 @@ +Fix missing `vec_scalar_t` on PPC during compilation. + +From https://github.com/pytorch/pytorch/pull/67331 + +From: Nikita Shulga +Date: Wed, 27 Oct 2021 08:04:08 -0700 +Subject: [PATCH] Add dummy bfloat16 VSX implementation + +Just a copy of DEFAULT bfloat16 implementation and revert restriction +introduced by https://github.com/pytorch/pytorch/pull/61630 + +Fixes https://github.com/pytorch/pytorch/issues/66867 and https://github.com/pytorch/pytorch/issues/62016 +--- + aten/src/ATen/cpu/vec/functional.h | 2 - + .../cpu/vec/vec256/vsx/vec256_bfloat16_vsx.h | 54 +++++++++++++++++++ + .../cpu/vec/vec256/vsx/vec256_common_vsx.h | 6 +++ + 3 files changed, 60 insertions(+), 2 deletions(-) + create mode 100644 aten/src/ATen/cpu/vec/vec256/vsx/vec256_bfloat16_vsx.h + +diff --git a/aten/src/ATen/cpu/vec/functional.h b/aten/src/ATen/cpu/vec/functional.h +index 210ae9e9e883..388b3170d5b5 100644 +--- a/aten/src/ATen/cpu/vec/functional.h ++++ b/aten/src/ATen/cpu/vec/functional.h +@@ -1,6 +1,4 @@ + #pragma once + + #include +-#if !defined(__VSX__) || !defined(CPU_CAPABILITY_VSX) + #include +-#endif +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_bfloat16_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_bfloat16_vsx.h +new file mode 100644 +index 000000000000..ee8ca997a20e +--- /dev/null ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_bfloat16_vsx.h +@@ -0,0 +1,54 @@ ++#pragma once ++ ++#include ++#include ++#include ++namespace at { ++namespace vec { ++// See Note [Acceptable use of anonymous namespace in header] ++namespace { ++ ++inline std::tuple, Vectorized> convert_bfloat16_float( ++ const Vectorized& a) { ++ constexpr int64_t K = Vectorized::size(); ++ __at_align__ float arr[K]; ++ __at_align__ BFloat16 arr2[K]; ++ a.store(arr2); ++ convert(arr2, arr, K); ++ return std::make_tuple( ++ Vectorized::loadu(arr), ++ Vectorized::loadu(arr + Vectorized::size())); ++} ++ ++inline Vectorized convert_float_bfloat16( ++ const Vectorized& a, ++ const Vectorized& b) { ++ constexpr int64_t K = Vectorized::size(); ++ __at_align__ float arr[K]; ++ __at_align__ BFloat16 arr2[K]; ++ a.store(arr); ++ b.store(arr + Vectorized::size()); ++ convert(arr, arr2, K); ++ return Vectorized::loadu(arr2); ++} ++ ++void load_fp32_from_bf16(const c10::BFloat16* data, Vectorized& out) { ++ __at_align__ float values[Vectorized::size()]; ++ for (int k = 0; k < Vectorized::size(); ++k) { ++ values[k] = data[k]; ++ } ++ out = Vectorized::loadu(values); ++} ++ ++C10_UNUSED void load_fp32_from_bf16( ++ const c10::BFloat16* data, ++ Vectorized& out1, ++ Vectorized& out2) { ++ load_fp32_from_bf16(data, out1); ++ data += Vectorized::size(); ++ load_fp32_from_bf16(data, out2); ++} ++ ++} // namespace ++} // namespace vec ++} // namespace at +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_common_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_common_vsx.h +index 3d798a7f6268..b06f6a4bd1b5 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_common_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_common_vsx.h +@@ -3,6 +3,8 @@ + #include + #include + #include ++ ++// Note: header order is important here + #include + #include + #include +@@ -11,8 +13,12 @@ + #include + #include + #include ++ + #include + #include ++ ++#include ++ + namespace at { + namespace vec { + diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-faulty-asserts.patch similarity index 88% rename from easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch rename to easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-faulty-asserts.patch index eab07090d2ab..1ba71564df77 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-faulty-asserts.patch @@ -1,6 +1,6 @@ From: Alexander Grund Date: Tue, 18 May 2021 15:08:41 +0200 -Subject: [PATCH 1/2] Fix usage of TORCH_INTERNAL_ASSERT with message +Subject: Fix usage of TORCH_INTERNAL_ASSERT with message Using only a string as the argument for TORCH_INTERNAL_ASSERT will never trigger a failure as a string is always a truethy value. @@ -8,11 +8,6 @@ This hides actual bugs and makes users and devs think all worked while it did not. Change to use TORCH_INTERNAL_ASSERT(false, "msg") -Subject: [PATCH 2/2] Add missing skip decorator for -test_preserve_bundled_inputs_methods - -This test uses optimize_for_mobile which requires NNPACK to work - diff --git a/aten/src/ATen/native/BinaryOps.cpp b/aten/src/ATen/native/BinaryOps.cpp index c4edadb03e..e889cd03a8 100644 --- a/aten/src/ATen/native/BinaryOps.cpp @@ -83,20 +78,6 @@ index 6de646acfe..66341c959d 100644 } for (int i = num_output_channels; i < num_output_channels_padded; ++i) { weight_scales_data[i] = 1.f; -diff --git a/test/test_mobile_optimizer.py b/test/test_mobile_optimizer.py -index 11ef019a26..7b5ac1a239 100644 ---- a/test/test_mobile_optimizer.py -+++ b/test/test_mobile_optimizer.py -@@ -269,6 +269,9 @@ class TestOptimizer(TestCase): - bi_module_lint_list = generate_mobile_module_lints(bi_module) - self.assertEqual(len(bi_module_lint_list), 0) - -+ @unittest.skipUnless(torch.backends.xnnpack.enabled, -+ " XNNPACK must be enabled for these tests." -+ " Please build with USE_XNNPACK=1.") - def test_preserve_bundled_inputs_methods(self): - class MyBundledInputModule(torch.nn.Module): - def __init__(self): diff --git a/torch/csrc/jit/api/module.cpp b/torch/csrc/jit/api/module.cpp index 38592b80b9..8f9508321b 100644 --- a/torch/csrc/jit/api/module.cpp diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch new file mode 100644 index 000000000000..b62f757853f1 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-fp16-quantization-without-fbgemm.patch @@ -0,0 +1,43 @@ +From https://github.com/pytorch/pytorch/pull/84750 + +From d5480f56374e7612d4400e845b05bc3a4507d87c Mon Sep 17 00:00:00 2001 +From: Alexander Grund +Date: Fri, 9 Sep 2022 14:39:49 +0200 +Subject: [PATCH] Fix Use-after-Free in qembeddingbag_byte_prepack_out + +When FBGEMM is not used (either manually disabled or on platforms such +as POWER where it isn't supported at all) the fallback code requests a +`data_ptr` on a `Tensor` object returned by +`to(ScalarType::Float)` in the same line. This object will be destroyed +at the end of the line leading to a dangling pointer. + +On some platforms this manifests in wrong results being returned as the +memory gets overwritten. + +Fix this by binding the temporary object (or initial object) to a const +value reference which extents its lifetime and getting the `data_ptr` +from that. + +Fixes #84748 +--- + .../ATen/native/quantized/cpu/qembeddingbag_prepack.cpp | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp b/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp +index 614e274b54..6c04d0ae12 100644 +--- a/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp ++++ b/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp +@@ -242,9 +242,10 @@ Tensor& qembeddingbag_byte_prepack_out(Tensor& output, const Tensor& weight) { + } + + #else +- const auto weight_data = weight_contig->scalar_type() == at::ScalarType::Half +- ? weight_contig->to(at::ScalarType::Float).data_ptr() +- : weight_contig->data_ptr(); ++ const Tensor& float_weight = weight_contig->scalar_type() == at::ScalarType::Half ++ ? weight_contig->to(at::ScalarType::Float) ++ : *weight_contig; ++ const auto weight_data = float_weight.data_ptr(); + constexpr float kEpsilon = 1e-8f; + for (auto row: c10::irange(embedding_rows)) { + const float* input_row = weight_data + row * embedding_cols; diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-missing-VSX-dispatch.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-missing-VSX-dispatch.patch new file mode 100644 index 000000000000..dcc0eac4bf06 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_fix-missing-VSX-dispatch.patch @@ -0,0 +1,26 @@ + +Fix for following error on PPC: +> undefined reference to `at::native::DispatchStub, c10::ArrayRef, c10::ArrayRef, void const*, c10::ArrayRef, void*), at::native::fft_fill_with_conjugate_symmetry_stub>::VSX' + +From https://github.com/pytorch/pytorch/pull/68914 + +From: Deepali Chourasia +Date: Thu, 25 Nov 2021 12:29:50 +0000 +Subject: [PATCH] add VSX dispatch for fft_fill_with_conjugate_symmetry_stub + +--- + aten/src/ATen/native/mkl/SpectralOps.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp +index 0625110a04ff..2bd05ca19cac 100644 +--- a/aten/src/ATen/native/mkl/SpectralOps.cpp ++++ b/aten/src/ATen/native/mkl/SpectralOps.cpp +@@ -151,6 +151,7 @@ static void _fft_fill_with_conjugate_symmetry_cpu_( + REGISTER_ARCH_DISPATCH(fft_fill_with_conjugate_symmetry_stub, DEFAULT, &_fft_fill_with_conjugate_symmetry_cpu_) + REGISTER_AVX2_DISPATCH(fft_fill_with_conjugate_symmetry_stub, &_fft_fill_with_conjugate_symmetry_cpu_) + REGISTER_AVX512_DISPATCH(fft_fill_with_conjugate_symmetry_stub, &_fft_fill_with_conjugate_symmetry_cpu_) ++REGISTER_VSX_DISPATCH(fft_fill_with_conjugate_symmetry_stub, &_fft_fill_with_conjugate_symmetry_cpu_) + + // _out variants can be shared between PocketFFT and MKL + Tensor& _fft_r2c_mkl_out(const Tensor& self, IntArrayRef dim, int64_t normalization, diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_remove-vec-dump-functions.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_remove-vec-dump-functions.patch new file mode 100644 index 000000000000..b6e2100a1789 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0_remove-vec-dump-functions.patch @@ -0,0 +1,114 @@ +Fix compilation on PPC due to missing include + +From https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/66085 + +From e0fa15d188986e2a68973e3facf9d794f6cfc5df Mon Sep 17 00:00:00 2001 +From: Nikita Shulga +Date: Mon, 4 Oct 2021 09:26:37 -0700 +Subject: [PATCH] Remove unused `dump` method from VSX vec256 methods + +Follow up after https://github.com/pytorch/pytorch/pull/63533 + +Probably fixes https://github.com/pytorch/pytorch/issues/65956 +--- + .../ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h | 5 ----- + .../ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h | 8 -------- + aten/src/ATen/cpu/vec/vec256/vsx/vec256_double_vsx.h | 3 --- + aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h | 7 ------- + aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h | 9 --------- + 5 files changed, 32 deletions(-) + +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h +index 888f2f0b932b..3a3e0daade09 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h +@@ -356,11 +356,6 @@ class Vectorized { + return {vec_sqrt(_vec0), vec_sqrt(_vec1)}; + } + +- void dump() const { +- std::cout << _vec0[0] << "," << _vec0[1] << ","; +- std::cout << _vec1[0] << "," << _vec1[1] << std::endl; +- } +- + Vectorized sqrt() const { + return map(std::sqrt); + } +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h +index 0aa726b9bfdd..712de24597dc 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h +@@ -144,7 +144,6 @@ class Vectorized { + // convert std::complex index mask to V index mask: xy -> xxyy + auto mask_complex = Vectorized( + vec_mergeh(mask._vec0, mask._vec0), vec_mergeh(mask._vec1, mask._vec1)); +- // mask_complex.dump(); + return { + vec_sel(a._vec0, b._vec0, mask_complex._vec0), + vec_sel(a._vec1, b._vec1, mask_complex._vec1), +@@ -409,13 +408,6 @@ class Vectorized { + return {vec_sqrt(_vec0), vec_sqrt(_vec1)}; + } + +- void dump() const { +- std::cout << _vec0[0] << "," << _vec0[1] << "," << _vec0[2] << "," +- << _vec0[3] << ","; +- std::cout << _vec1[0] << "," << _vec1[1] << "," << _vec1[2] << "," +- << _vec1[3] << std::endl; +- } +- + Vectorized sqrt() const { + return map(std::sqrt); + } +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_double_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_double_vsx.h +index 29616182fe12..0d5b5cb39384 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_double_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_double_vsx.h +@@ -188,9 +188,6 @@ class Vectorized { + } + const double& operator[](int idx) const = delete; + double& operator[](int idx) = delete; +- void dump() const { +- std::cout << _vec0[0] << "," << _vec0[1] << "," << _vec1[0] << "," << _vec1[1] << std::endl; +- } + Vectorized map(double (*const f)(double)) const { + Vectorized ret; + for (int i = 0; i < size()/2; i++) { +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h +index 2427276bcea2..58d4b057b507 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h +@@ -436,13 +436,6 @@ class Vectorized { + return {vec_neg(_vec0), vec_neg(_vec1)}; + } + +- void dump() const { +- std::cout << _vec0[0] << "," << _vec0[1] << "," << _vec0[2] << "," +- << _vec0[3] << ","; +- std::cout << _vec1[0] << "," << _vec1[1] << "," << _vec1[2] << "," +- << _vec1[3] << std::endl; +- } +- + Vectorized C10_ALWAYS_INLINE round() const { + return {vec_round(_vec0), vec_round(_vec1)}; + } +diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h +index c335ace0ced6..2482af6ec232 100644 +--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h ++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h +@@ -369,15 +369,6 @@ struct Vectorized { + return {vec0, vec1}; + } + +- void dump() const { +- value_type vals[size()]; +- store((void*)vals); +- for (int i = 0; i < size(); ++i) { +- std::cout << (int)(vals[i]) << " "; +- } +- std::cout << std::endl; +- } +- + DEFINE_MEMBER_OP(operator==, c10::quint8, vec_cmpeq) + DEFINE_MEMBER_OP(operator!=, c10::quint8, vec_cmpne) + DEFINE_MEMBER_OP(operator<, c10::quint8, vec_cmplt)