From f0eb34f5f4a8dbc67952f6da2a72500d6dfb86d2 Mon Sep 17 00:00:00 2001 From: Lou Knauer Date: Wed, 6 Sep 2023 11:48:42 +0200 Subject: [PATCH 1/2] [SingleSource/Vectorizer] Add unit tests for the vplan-native path. This patch adds unit tests for LLVM's VPlan-native path, as requested in https://reviews.llvm.org/D157484. The new Vectorizer/VPlanNativePath subdirectory is only enabled if the compiler is clang. For all source files in that directory, the flags "-mllvm -enable-vplan-native-path" are added. Four different scenarios are tested for outer-loop vectorization: - Matrix multiplication, where the second of three loops is vectorized. - A test where the vectorized loop has an auxiliary induction variable. - A test for indirect and strided memory accesses. - A nesting of three loops where the outer-most one is vectorized. --- .../UnitTests/Vectorizer/CMakeLists.txt | 5 + .../Vectorizer/VPlanNativePath/CMakeLists.txt | 6 + .../VPlanNativePath/outer-loop-vect.cpp | 139 ++++++++++++++++++ .../outer-loop-vect.reference_output | 5 + 4 files changed, 155 insertions(+) create mode 100644 SingleSource/UnitTests/Vectorizer/VPlanNativePath/CMakeLists.txt create mode 100644 SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.cpp create mode 100644 SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.reference_output diff --git a/SingleSource/UnitTests/Vectorizer/CMakeLists.txt b/SingleSource/UnitTests/Vectorizer/CMakeLists.txt index 03a2ca3c8c..5917c31bb6 100644 --- a/SingleSource/UnitTests/Vectorizer/CMakeLists.txt +++ b/SingleSource/UnitTests/Vectorizer/CMakeLists.txt @@ -1,2 +1,7 @@ llvm_singlesource() set_property(TARGET runtime-checks PROPERTY CXX_STANDARD 17) + +# The VPlan-native path is specific to llvm. +if ("${CMAKE_C_COMPILER_ID}" MATCHES "Clang") + add_subdirectory(VPlanNativePath) +endif() diff --git a/SingleSource/UnitTests/Vectorizer/VPlanNativePath/CMakeLists.txt b/SingleSource/UnitTests/Vectorizer/VPlanNativePath/CMakeLists.txt new file mode 100644 index 0000000000..d9ec6dbbad --- /dev/null +++ b/SingleSource/UnitTests/Vectorizer/VPlanNativePath/CMakeLists.txt @@ -0,0 +1,6 @@ +# Enable the VPlan-native path for outer-loop vectorization. Disable +# vectorization in general (`#pragma clang loop vectorize(enable)` overwrites +# this) because the two code paths do not mix well. +list(APPEND CXXFLAGS "-mllvm" "-enable-vplan-native-path" "-fno-vectorize") + +llvm_singlesource() diff --git a/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.cpp b/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.cpp new file mode 100644 index 0000000000..3aab08e40c --- /dev/null +++ b/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.cpp @@ -0,0 +1,139 @@ +#include +#include +#include +#include + +#include "../common.h" + +// Tests for outer-loop vectorization in LLVM's VPlan-native path. + +#define DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV(Args, Loop) \ + auto ScalarFn = [] Args { \ + _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \ + }; \ + auto VectorFn = [] Args { \ + _Pragma("clang loop vectorize(enable)") Loop \ + }; + +#define DEFINE_SCALAR_AND_VECTOR_FN_FOR_NESTED_OLV(Args, Loop) \ + auto ScalarFn = [] Args { \ + for (size_t i = 0; i < N; i++) { \ + _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \ + } \ + }; \ + auto VectorFn = [] Args { \ + for (size_t i = 0; i < N; i++) { \ + _Pragma("clang loop vectorize(enable)") Loop \ + } \ + }; + +int main() { + rng = std::mt19937(15); + + { + // A matrix-multiplication where the second loop of the triple loop nest + // is vectorized (the macro adds the outer-most loop). + DEFINE_SCALAR_AND_VECTOR_FN_FOR_NESTED_OLV( + (size_t N, size_t M, size_t L, + int32_t *__restrict__ A, const int32_t *B, const int32_t *C), + for (size_t j = 0; j < L; j++) { + int32_t a = 0; + for (size_t k = 0; k < M; k++) + a += B[i*M+k] * C[k*L+j]; + A[i*L+j] = a; + }); + + std::cout << "Checking matrix-multiplication\n"; + + size_t N = 100, M = 100, L = 100; + std::unique_ptr A_Reference(new int32_t[N*L]); + std::unique_ptr A_ToCheck(new int32_t[N*L]); + std::unique_ptr B(new int32_t[N*M]); + std::unique_ptr C(new int32_t[M*L]); + init_data(B, N*M); + init_data(C, M*L); + + ScalarFn(N, M, L, &A_Reference[0], &B[0], &C[0]); + VectorFn(N, M, L, &A_ToCheck[0], &B[0], &C[0]); + check(A_Reference, A_ToCheck, N*L); + } + + { + // A test where the vectorized loop itself has an auxiliary IV. + DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV( + (size_t N, int32_t *__restrict__ A, const int32_t *B), + for (size_t i = 0, aux_iv = 333; i < N; i++, aux_iv += 12) { + int32_t b = B[i]; + for (size_t j = 0; j < N; j++) { + b += aux_iv * B[j]; + } + A[i] = aux_iv + b; + }); + + std::cout << "Checking loop with auxiliary IV\n"; + + size_t N = 123; + std::unique_ptr A_Reference(new int32_t[N]); + std::unique_ptr A_ToCheck(new int32_t[N]); + std::unique_ptr B(new int32_t[N]); + init_data(B, N); + + ScalarFn(N, &A_Reference[0], &B[0]); + VectorFn(N, &A_ToCheck[0], &B[0]); + check(A_Reference, A_ToCheck, N); + } + + { + // A test for irregular memory accesses patterns. + DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV( + (size_t N, size_t M, + int32_t *__restrict__ A, const int32_t *B, const int32_t *C), + for (size_t i = 0; i < N; i++) { + int32_t a = 0; + for (size_t j = 0; j < M / 2; j++) { + int32_t idx = C[j*2]; + a += B[idx % N]; + } + A[i] = a; + }) + + std::cout << "Checking loop with indirect memory accesses\n"; + + size_t N = 123, M = 456; + std::unique_ptr A_Reference(new int32_t[N]); + std::unique_ptr A_ToCheck(new int32_t[N]); + std::unique_ptr B(new int32_t[N]); + std::unique_ptr C(new int32_t[M]); + init_data(B, N); + init_data(C, M); + + ScalarFn(N, M, &A_Reference[0], &B[0], &C[0]); + VectorFn(N, M, &A_ToCheck[0], &B[0], &C[0]); + check(A_Reference, A_ToCheck, N); + } + + { + // A test where the vectorized loop contains a loop which contains + // another loop itself. + DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV( + (size_t N, size_t M, size_t L, int32_t *__restrict__ A), + for (size_t i = 0; i < N; i++) { + for (size_t j = 0; j < M; j++) { + for (size_t k = 0; k < L; k++) { + A[i*M*L+j*L+k] = i * j * k; + } + } + }); + + std::cout << "Checking triple-loop-nest\n"; + + size_t N = 123, M = 45, L = 67; + std::unique_ptr A_Reference(new int32_t[N*M*L]); + std::unique_ptr A_ToCheck(new int32_t[N*M*L]); + + ScalarFn(N, M, L, &A_Reference[0]); + VectorFn(N, M, L, &A_ToCheck[0]); + check(A_Reference, A_ToCheck, N*M*L); + } + +} diff --git a/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.reference_output b/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.reference_output new file mode 100644 index 0000000000..60b78f2223 --- /dev/null +++ b/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.reference_output @@ -0,0 +1,5 @@ +Checking matrix-multiplication +Checking loop with auxiliary IV +Checking loop with indirect memory accesses +Checking triple-loop-nest +exit 0 From 1382bbc0caadce460cd1740dc12c8376d055bb32 Mon Sep 17 00:00:00 2001 From: Lou Knauer Date: Tue, 21 Nov 2023 19:01:35 +0100 Subject: [PATCH 2/2] Fix coding-style --- .../VPlanNativePath/outer-loop-vect.cpp | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.cpp b/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.cpp index 3aab08e40c..ca7ae54eda 100644 --- a/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.cpp +++ b/SingleSource/UnitTests/Vectorizer/VPlanNativePath/outer-loop-vect.cpp @@ -17,12 +17,12 @@ #define DEFINE_SCALAR_AND_VECTOR_FN_FOR_NESTED_OLV(Args, Loop) \ auto ScalarFn = [] Args { \ - for (size_t i = 0; i < N; i++) { \ + for (size_t I = 0; I < N; I++) { \ _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \ } \ }; \ auto VectorFn = [] Args { \ - for (size_t i = 0; i < N; i++) { \ + for (size_t I = 0; I < N; I++) { \ _Pragma("clang loop vectorize(enable)") Loop \ } \ }; @@ -36,22 +36,22 @@ int main() { DEFINE_SCALAR_AND_VECTOR_FN_FOR_NESTED_OLV( (size_t N, size_t M, size_t L, int32_t *__restrict__ A, const int32_t *B, const int32_t *C), - for (size_t j = 0; j < L; j++) { - int32_t a = 0; - for (size_t k = 0; k < M; k++) - a += B[i*M+k] * C[k*L+j]; - A[i*L+j] = a; + for (size_t J = 0; J < L; J++) { + int32_t X = 0; + for (size_t K = 0; K < M; K++) + X += B[I * M + K] * C[K * L + J]; + A[I * L + J] = X; }); std::cout << "Checking matrix-multiplication\n"; size_t N = 100, M = 100, L = 100; - std::unique_ptr A_Reference(new int32_t[N*L]); - std::unique_ptr A_ToCheck(new int32_t[N*L]); - std::unique_ptr B(new int32_t[N*M]); - std::unique_ptr C(new int32_t[M*L]); - init_data(B, N*M); - init_data(C, M*L); + std::unique_ptr A_Reference(new int32_t[N * L]); + std::unique_ptr A_ToCheck(new int32_t[N * L]); + std::unique_ptr B(new int32_t[N * M]); + std::unique_ptr C(new int32_t[M * L]); + init_data(B, N * M); + init_data(C, M * L); ScalarFn(N, M, L, &A_Reference[0], &B[0], &C[0]); VectorFn(N, M, L, &A_ToCheck[0], &B[0], &C[0]); @@ -62,12 +62,12 @@ int main() { // A test where the vectorized loop itself has an auxiliary IV. DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV( (size_t N, int32_t *__restrict__ A, const int32_t *B), - for (size_t i = 0, aux_iv = 333; i < N; i++, aux_iv += 12) { - int32_t b = B[i]; - for (size_t j = 0; j < N; j++) { - b += aux_iv * B[j]; + for (size_t I = 0, AuxIV = 333; I < N; I++, AuxIV += 12) { + int32_t X = B[I]; + for (size_t J = 0; J < N; J++) { + X += AuxIV * B[J]; } - A[i] = aux_iv + b; + A[I] = AuxIV + X; }); std::cout << "Checking loop with auxiliary IV\n"; @@ -88,13 +88,13 @@ int main() { DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV( (size_t N, size_t M, int32_t *__restrict__ A, const int32_t *B, const int32_t *C), - for (size_t i = 0; i < N; i++) { - int32_t a = 0; - for (size_t j = 0; j < M / 2; j++) { - int32_t idx = C[j*2]; - a += B[idx % N]; + for (size_t I = 0; I < N; I++) { + int32_t X = 0; + for (size_t J = 0; J < M / 2; J++) { + int32_t Idx = C[J * 2]; + X += B[Idx % N]; } - A[i] = a; + A[I] = X; }) std::cout << "Checking loop with indirect memory accesses\n"; @@ -117,10 +117,10 @@ int main() { // another loop itself. DEFINE_SCALAR_AND_VECTOR_FN_FOR_OLV( (size_t N, size_t M, size_t L, int32_t *__restrict__ A), - for (size_t i = 0; i < N; i++) { - for (size_t j = 0; j < M; j++) { - for (size_t k = 0; k < L; k++) { - A[i*M*L+j*L+k] = i * j * k; + for (size_t I = 0; I < N; I++) { + for (size_t J = 0; J < M; J++) { + for (size_t K = 0; K < L; K++) { + A[I * (M * L) + J * L + K] = I * J * K; } } }); @@ -128,12 +128,12 @@ int main() { std::cout << "Checking triple-loop-nest\n"; size_t N = 123, M = 45, L = 67; - std::unique_ptr A_Reference(new int32_t[N*M*L]); - std::unique_ptr A_ToCheck(new int32_t[N*M*L]); + std::unique_ptr A_Reference(new int32_t[N * M * L]); + std::unique_ptr A_ToCheck(new int32_t[N * M * L]); ScalarFn(N, M, L, &A_Reference[0]); VectorFn(N, M, L, &A_ToCheck[0]); - check(A_Reference, A_ToCheck, N*M*L); + check(A_Reference, A_ToCheck, N * M * L); } }