From bb74b1a0e643bea51e31b103ab6cf7a429727524 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Tue, 17 Oct 2023 15:31:57 -0500 Subject: [PATCH] Add ARM build configs for latest gcc/clang. (#468) Co-authored-by: Michael Schellenberger Costa --- .github/workflows/dispatch-build-and-test.yml | 4 +-- ci/matrix.yaml | 2 ++ thrust/testing/functional.cu | 19 +++++++++++-- thrust/testing/transform.cu | 27 +++++++++++++++++++ 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml index 96557aa8437..f421910f244 100644 --- a/.github/workflows/dispatch-build-and-test.yml +++ b/.github/workflows/dispatch-build-and-test.yml @@ -22,7 +22,7 @@ jobs: include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} with: cpu: ${{ matrix.cpu }} - test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} + test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} build_script: "./ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}}" test_script: "./ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}}" container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} @@ -37,6 +37,6 @@ jobs: matrix: include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} with: - test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} + test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 ${{matrix.std}}" container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}} diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 4c931381e02..9c362559d4c 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -35,6 +35,7 @@ pull_request: - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'gcc', version: '10', exe: 'g++'}, std: [11, 14, 17, 20], jobs: ['build']} - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '11', exe: 'g++'}, std: [11, 14, 17, 20], jobs: ['build']} - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'gcc', version: '12', exe: 'g++'}, std: [11, 14, 17, 20], jobs: ['build', 'test']} + - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'arm64', compiler: {name: 'gcc', version: '12', exe: 'g++'}, std: [11, 14, 17, 20], jobs: ['build']} - {cuda: *cuda_oldest, os: 'ubuntu18.04', cpu: 'amd64', compiler: {name: 'llvm', version: '9', exe: 'clang++'}, std: [11, 14, 17], jobs: ['build']} - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '9', exe: 'clang++'}, std: [11, 14, 17], jobs: ['build']} - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '10', exe: 'clang++'}, std: [11, 14, 17], jobs: ['build']} @@ -44,6 +45,7 @@ pull_request: - {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: {name: 'llvm', version: '14', exe: 'clang++'}, std: [11, 14, 17, 20], jobs: ['build']} - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'llvm', version: '15', exe: 'clang++'}, std: [11, 14, 17, 20], jobs: ['build']} - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: {name: 'llvm', version: '16', exe: 'clang++'}, std: [11, 14, 17, 20], jobs: ['build', 'test']} + - {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'arm64', compiler: {name: 'llvm', version: '16', exe: 'clang++'}, std: [11, 14, 17, 20], jobs: ['build']} - {cuda: *cuda_oldest, os: 'windows2022', cpu: 'amd64', compiler: {name: 'cl', version: '14.16', exe: 'cl++'}, std: [14, 17], jobs: ['build']} - {cuda: *cuda_newest, os: 'windows2022', cpu: 'amd64', compiler: {name: 'cl', version: '14.29', exe: 'cl++'}, std: [14, 17], jobs: ['build']} - {cuda: *cuda_newest, os: 'windows2022', cpu: 'amd64', compiler: {name: 'cl', version: '14.36', exe: 'cl++'}, std: [14, 17, 20], jobs: ['build']} diff --git a/thrust/testing/functional.cu b/thrust/testing/functional.cu index f5ca10dc834..11b5a5fc58d 100644 --- a/thrust/testing/functional.cu +++ b/thrust/testing/functional.cu @@ -7,9 +7,18 @@ THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN +// There is a unfortunate miscompilation of the gcc-12 vectorizer leading to OOB writes +// Adding this attribute suffices that this miscompilation does not appear anymore +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && __GNUC__ >= 12 && THRUST_CPP_DIALECT >= 2020 +#define THRUST_DISABLE_BROKEN_GCC_VECTORIZER __attribute__((optimize("no-tree-vectorize"))) +#else +#define THRUST_DISABLE_BROKEN_GCC_VECTORIZER +#endif + const size_t NUM_SAMPLES = 10000; template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestUnaryFunctional(void) { typedef typename InputVector::value_type InputType; @@ -28,6 +37,7 @@ void TestUnaryFunctional(void) } template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestBinaryFunctional(void) { typedef typename InputVector::value_type InputType; @@ -51,8 +61,6 @@ void TestBinaryFunctional(void) ASSERT_ALMOST_EQUAL(output, std_output); } - - // XXX add bool to list // Instantiate a macro for all integer-like data types #define INSTANTIATE_INTEGER_TYPES(Macro, vector_type, operator_name) \ @@ -175,6 +183,7 @@ DECLARE_UNARY_LOGICAL_FUNCTIONAL_UNITTEST(logical_not, LogicalNot); // Ad-hoc testing for other functionals template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestIdentityFunctional(void) { typedef typename Vector::value_type T; @@ -191,6 +200,7 @@ void TestIdentityFunctional(void) DECLARE_VECTOR_UNITTEST(TestIdentityFunctional); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestProject1stFunctional(void) { typedef typename Vector::value_type T; @@ -211,6 +221,7 @@ void TestProject1stFunctional(void) DECLARE_VECTOR_UNITTEST(TestProject1stFunctional); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestProject2ndFunctional(void) { typedef typename Vector::value_type T; @@ -231,6 +242,7 @@ void TestProject2ndFunctional(void) DECLARE_VECTOR_UNITTEST(TestProject2ndFunctional); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestMaximumFunctional(void) { typedef typename Vector::value_type T; @@ -255,6 +267,7 @@ void TestMaximumFunctional(void) DECLARE_VECTOR_UNITTEST(TestMaximumFunctional); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestMinimumFunctional(void) { typedef typename Vector::value_type T; @@ -279,6 +292,7 @@ void TestMinimumFunctional(void) DECLARE_VECTOR_UNITTEST(TestMinimumFunctional); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestNot1(void) { typedef typename Vector::value_type T; @@ -314,6 +328,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestNot1); THRUST_CPP_DIALECT == 2011) template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestNot2(void) { typedef typename Vector::value_type T; diff --git a/thrust/testing/transform.cu b/thrust/testing/transform.cu index 87d60f9c978..0c9ce9efadf 100644 --- a/thrust/testing/transform.cu +++ b/thrust/testing/transform.cu @@ -7,8 +7,16 @@ #include #include +// There is a unfortunate miscompilation of the gcc-12 vectorizer leading to OOB writes +// Adding this attribute suffices that this miscompilation does not appear anymore +#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && __GNUC__ >= 12 && THRUST_CPP_DIALECT >= 2020 +#define THRUST_DISABLE_BROKEN_GCC_VECTORIZER __attribute__((optimize("no-tree-vectorize"))) +#else +#define THRUST_DISABLE_BROKEN_GCC_VECTORIZER +#endif template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformUnarySimple(void) { typedef typename Vector::value_type T; @@ -78,6 +86,7 @@ DECLARE_UNITTEST(TestTransformUnaryDispatchImplicit); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformIfUnaryNoStencilSimple(void) { typedef typename Vector::value_type T; @@ -166,6 +175,7 @@ DECLARE_UNITTEST(TestTransformIfUnaryNoStencilDispatchImplicit); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformIfUnarySimple(void) { typedef typename Vector::value_type T; @@ -259,6 +269,7 @@ DECLARE_UNITTEST(TestTransformIfUnaryDispatchImplicit); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformBinarySimple(void) { typedef typename Vector::value_type T; @@ -339,6 +350,7 @@ DECLARE_UNITTEST(TestTransformBinaryDispatchImplicit); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformIfBinarySimple(void) { typedef typename Vector::value_type T; @@ -447,6 +459,7 @@ DECLARE_UNITTEST(TestTransformIfBinaryDispatchImplicit); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformUnary(const size_t n) { thrust::host_vector h_input = unittest::random_integers(n); @@ -464,6 +477,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformUnary); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformUnaryToDiscardIterator(const size_t n) { thrust::host_vector h_input = unittest::random_integers(n); @@ -495,6 +509,7 @@ struct repeat2 template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformUnaryToDiscardIteratorZipped(const size_t n) { thrust::host_vector h_input = unittest::random_integers(n); @@ -542,6 +557,7 @@ struct is_positive template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformIfUnaryNoStencil(const size_t n) { thrust::host_vector h_input = unittest::random_integers(n); @@ -564,6 +580,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformIfUnaryNoStencil); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformIfUnary(const size_t n) { thrust::host_vector h_input = unittest::random_integers(n); @@ -590,6 +607,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformIfUnary); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformIfUnaryToDiscardIterator(const size_t n) { thrust::host_vector h_input = unittest::random_integers(n); @@ -619,6 +637,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformIfUnaryToDiscardIterator); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformBinary(const size_t n) { thrust::host_vector h_input1 = unittest::random_integers(n); @@ -643,6 +662,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformBinary); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformBinaryToDiscardIterator(const size_t n) { thrust::host_vector h_input1 = unittest::random_integers(n); @@ -664,6 +684,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformBinaryToDiscardIterator); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformIfBinary(const size_t n) { thrust::host_vector h_input1 = unittest::random_integers(n); @@ -711,6 +732,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformIfBinary); template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformIfBinaryToDiscardIterator(const size_t n) { thrust::host_vector h_input1 = unittest::random_integers(n); @@ -745,6 +767,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformIfBinaryToDiscardIterator); #if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) == 40400) || defined(__INTEL_COMPILER) template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformUnaryCountingIterator() { // G++ 4.4.x has a known failure with auto-vectorization (due to -O3 or @@ -758,6 +781,7 @@ void TestTransformUnaryCountingIterator() } #else template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformUnaryCountingIterator() { size_t const n = 15 * sizeof(T); @@ -780,6 +804,7 @@ DECLARE_GENERIC_UNITTEST(TestTransformUnaryCountingIterator); #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) == 40400 template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformBinaryCountingIterator() { // GCC 4.4.x has a known failure with auto-vectorization (due to -O3 or -ftree-vectorize) of this test @@ -789,6 +814,7 @@ void TestTransformBinaryCountingIterator() } #else template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformBinaryCountingIterator() { size_t const n = 15 * sizeof(T); @@ -825,6 +851,7 @@ struct plus_mod3 }; template +THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestTransformWithIndirection(void) { // add numbers modulo 3 with external lookup table