Skip to content

Commit

Permalink
Update compat and versions, add intel compiler to benchmarks.
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Jan 16, 2020
1 parent bb75698 commit 516216b
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 209 deletions.
16 changes: 8 additions & 8 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ version = "0.2.2"

[[DataStructures]]
deps = ["InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "f784254f428fb8fd7ac15982e5862a38a44523d3"
git-tree-sha1 = "b7720de347734f4716d1815b00ce5664ed6bbfd4"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.7"
version = "0.17.9"

[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
Expand Down Expand Up @@ -61,15 +61,15 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[SIMDPirates]]
deps = ["MacroTools", "VectorizationBase"]
git-tree-sha1 = "c0f42ddb2645c54b8620979df5dc979c4742db59"
git-tree-sha1 = "910193d289b41e570118c4e444f0c05cc700a2f7"
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
version = "0.1.4"
version = "0.1.5"

[[SLEEFPirates]]
deps = ["SIMDPirates", "VectorizationBase"]
git-tree-sha1 = "547bcf7d30967d87d4c62b3fe5efdb0e57a6e436"
git-tree-sha1 = "4733445246d3d5536c7aee1bffb55ab37b88347b"
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
version = "0.1.2"
version = "0.1.3"

[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Expand All @@ -83,6 +83,6 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[VectorizationBase]]
deps = ["CpuId", "LinearAlgebra"]
git-tree-sha1 = "81c1b3171d93e64345d75a9f08d190a155e9f009"
git-tree-sha1 = "a2576763aa20968ffb5668e2e15d45ae8e364d05"
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
version = "0.1.7"
version = "0.1.9"
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
[compat]
MacroTools = "0.5"
Parameters = "0.12.0"
SIMDPirates = "0.1.4"
SLEEFPirates = "0.1.2"
VectorizationBase = "0.1.7"
SIMDPirates = "0.1.5"
SLEEFPirates = "0.1.3"
VectorizationBase = "0.1.9"
julia = "1.3.0"

[extras]
Expand Down
76 changes: 58 additions & 18 deletions benchmark/benchmarkflops.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ end
tothreetuple(i::Int) = (i,i,i)
tothreetuple(i::NTuple{3,Int}) = i
function benchmark_gemm(sizes)
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "GFort-intrinsic", "LoopVectorization"]
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "GFort-intrinsic", "icc", "ifort", "ifort-intrinsic", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
M, K, N = tothreetuple(s)
Expand All @@ -57,7 +57,13 @@ function benchmark_gemm(sizes)
@assert C Cblas "Fort gemm wrong?"
br[5,i] = n_gflop / @belapsed fgemm_builtin!($C, $A, $B)
@assert C Cblas "Fort intrinsic gemm wrong?"
br[6,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
br[6,i] = n_gflop / @belapsed icgemm_nkm!($C, $A, $B)
@assert C Cblas "icc gemm wrong?"
br[7,i] = n_gflop / @belapsed ifgemm_nkm!($C, $A, $B)
@assert C Cblas "ifort gemm wrong?"
br[8,i] = n_gflop / @belapsed ifgemm_builtin!($C, $A, $B)
@assert C Cblas "ifort intrinsic gemm wrong?"
br[9,i] = n_gflop / @belapsed gemmavx!($C, $A, $B)
@assert C Cblas "LoopVec gemm wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand All @@ -67,7 +73,7 @@ function benchmark_gemm(sizes)
br
end
function benchmark_AtmulB(sizes)
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "GFort-intrinsic", "LoopVectorization"]
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "GFort-intrinsic", "icc", "ifort", "ifort-intrinsic", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
M, K, N = tothreetuple(s)
Expand All @@ -85,7 +91,13 @@ function benchmark_AtmulB(sizes)
@assert C Cblas "Fort gemm wrong?"
br[5,i] = n_gflop / @belapsed fAtmulB_builtin!($C, $At, $B)
@assert C Cblas "Fort intrinsic gemm wrong?"
br[6,i] = n_gflop / @belapsed jAtmulBavx!($C, $At, $B)
br[6,i] = n_gflop / @belapsed cAtmulB!($C, $At, $B)
@assert C Cblas "icc gemm wrong?"
br[7,i] = n_gflop / @belapsed ifAtmulB!($C, $At, $B)
@assert C Cblas "iort gemm wrong?"
br[8,i] = n_gflop / @belapsed ifAtmulB_builtin!($C, $At, $B)
@assert C Cblas "ifort intrinsic gemm wrong?"
br[9,i] = n_gflop / @belapsed jAtmulBavx!($C, $At, $B)
@assert C Cblas "LoopVec gemm wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand All @@ -96,7 +108,7 @@ function benchmark_AtmulB(sizes)
end

function benchmark_dot(sizes)
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
a = rand(s); b = rand(s);
Expand All @@ -109,7 +121,11 @@ function benchmark_dot(sizes)
@assert cdot(a,b) dotblas "Polly dot wrong?"
br[4,i] = n_gflop / @belapsed fdot($a, $b)
@assert fdot(a,b) dotblas "Fort dot wrong?"
br[5,i] = n_gflop / @belapsed jdotavx($a, $b)
br[5,i] = n_gflop / @belapsed icdot($a, $b)
@assert cdot(a,b) dotblas "icc dot wrong?"
br[6,i] = n_gflop / @belapsed ifdot($a, $b)
@assert fdot(a,b) dotblas "ifort dot wrong?"
br[7,i] = n_gflop / @belapsed jdotavx($a, $b)
@assert jdotavx(a,b) dotblas "LoopVec dot wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand All @@ -119,7 +135,7 @@ function benchmark_dot(sizes)
br
end
function benchmark_selfdot(sizes)
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
a = rand(s);
Expand All @@ -132,7 +148,11 @@ function benchmark_selfdot(sizes)
@assert cselfdot(a) dotblas "Polly dot wrong?"
br[4,i] = n_gflop / @belapsed fselfdot($a)
@assert fselfdot(a) dotblas "Fort dot wrong?"
br[5,i] = n_gflop / @belapsed jselfdotavx($a)
br[5,i] = n_gflop / @belapsed icselfdot($a)
@assert cselfdot(a) dotblas "icc dot wrong?"
br[6,i] = n_gflop / @belapsed ifselfdot($a)
@assert fselfdot(a) dotblas "ifort dot wrong?"
br[7,i] = n_gflop / @belapsed jselfdotavx($a)
@assert jselfdotavx(a) dotblas "LoopVec dot wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand All @@ -144,7 +164,7 @@ end
totwotuple(i::Int) = (i,i)
totwotuple(i::Tuple{Int,Int}) = i
function benchmark_gemv(sizes)
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
M, N = totwotuple(s)
Expand All @@ -158,7 +178,11 @@ function benchmark_gemv(sizes)
@assert x xblas "Polly wrong?"
br[4,i] = n_gflop / @belapsed fgemv!($x, $A, $y)
@assert x xblas "Fort wrong?"
br[5,i] = n_gflop / @belapsed jgemvavx!($x, $A, $y)
br[5,i] = n_gflop / @belapsed icgemv!($x, $A, $y)
@assert x xblas "icc wrong?"
br[6,i] = n_gflop / @belapsed ifgemv!($x, $A, $y)
@assert x xblas "ifort wrong?"
br[7,i] = n_gflop / @belapsed jgemvavx!($x, $A, $y)
@assert x xblas "LoopVec wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand All @@ -168,7 +192,7 @@ function benchmark_gemv(sizes)
br
end
function benchmark_dot3(sizes)
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
M, N = totwotuple(s)
Expand All @@ -182,7 +206,11 @@ function benchmark_dot3(sizes)
@assert cdot3(x, A, y) dotblas "Polly dot wrong?"
br[4,i] = n_gflop / @belapsed fdot3($x, $A, $y)
@assert fdot3(x, A, y) dotblas "Fort dot wrong?"
br[5,i] = n_gflop / @belapsed jdot3avx($x, $A, $y)
br[5,i] = n_gflop / @belapsed icdot3($x, $A, $y)
@assert cdot3(x, A, y) dotblas "icc dot wrong?"
br[6,i] = n_gflop / @belapsed ifdot3($x, $A, $y)
@assert fdot3(x, A, y) dotblas "ifort dot wrong?"
br[7,i] = n_gflop / @belapsed jdot3avx($x, $A, $y)
@assert jdot3avx(x, A, y) dotblas "LoopVec dot wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand All @@ -196,7 +224,7 @@ function sse!(Xβ, y, X, β)
dot(Xβ, Xβ)
end
function benchmark_sse(sizes)
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
N, P = totwotuple(s)
Expand All @@ -212,7 +240,11 @@ function benchmark_sse(sizes)
@assert cOLSlp(y, X, β) lpblas "Polly wrong?"
br[4,i] = n_gflop / @belapsed fOLSlp($y, $X, $β)
@assert fOLSlp(y, X, β) lpblas "Fort wrong?"
br[5,i] = n_gflop / @belapsed jOLSlp_avx($y, $X, $β)
br[5,i] = n_gflop / @belapsed icOLSlp($y, $X, $β)
@assert cOLSlp(y, X, β) lpblas "icc wrong?"
br[6,i] = n_gflop / @belapsed ifOLSlp($y, $X, $β)
@assert fOLSlp(y, X, β) lpblas "ifort wrong?"
br[7,i] = n_gflop / @belapsed jOLSlp_avx($y, $X, $β)
@assert jOLSlp_avx(y, X, β) lpblas "LoopVec wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand All @@ -223,7 +255,7 @@ function benchmark_sse(sizes)
end

function benchmark_exp(sizes)
tests = ["Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
tests = ["Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
a = rand(s); b = similar(a)
Expand All @@ -234,7 +266,11 @@ function benchmark_exp(sizes)
@assert b baseb "Clang wrong?"
br[3,i] = n_gflop / @belapsed fvexp!($b, $a)
@assert b baseb "Fort wrong?"
br[4,i] = n_gflop / @belapsed @avx @. $b = exp($a)
br[4,i] = n_gflop / @belapsed icvexp!($b, $a)
@assert b baseb "icc wrong?"
br[5,i] = n_gflop / @belapsed ifvexp!($b, $a)
@assert b baseb "ifort wrong?"
br[6,i] = n_gflop / @belapsed @avx @. $b = exp($a)
@assert b baseb "LoopVec wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand All @@ -245,7 +281,7 @@ function benchmark_exp(sizes)
end

function benchmark_aplusBc(sizes)
tests = ["Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
tests = ["Julia", "Clang-Polly", "GFortran", "icc", "ifort", "LoopVectorization"]
br = BenchmarkResult(tests, sizes)
for (i,s) enumerate(sizes)
M, N = totwotuple(s)
Expand All @@ -258,7 +294,11 @@ function benchmark_aplusBc(sizes)
@assert D Dcopy "Polly wrong?"
br[3,i] = n_gflop / @belapsed faplusBc!($D, $a, $B, $c)
@assert D Dcopy "Fort wrong?"
br[4,i] = n_gflop / @belapsed @avx @. $D = $a + $B * $c′
br[4,i] = n_gflop / @belapsed icaplusBc!($D, $a, $B, $c)
@assert D Dcopy "icc wrong?"
br[5,i] = n_gflop / @belapsed ifaplusBc!($D, $a, $B, $c)
@assert D Dcopy "ifort wrong?"
br[6,i] = n_gflop / @belapsed @avx @. $D = $a + $B * $c′
@assert D Dcopy "LoopVec wrong?"
# if i % 10 == 0
# percent_complete = round(100i/ length(sizes), sigdigits = 4)
Expand Down
5 changes: 3 additions & 2 deletions benchmark/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# includet(joinpath(LOOPVECBENCHDIR, "driver.jl"))

pkgdir(pkg::String) = abspath(joinpath(dirname(Base.find_package(pkg)), ".."))
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmarks")
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmark")
include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
include(joinpath(LOOPVECBENCHDIR, "plotbenchmarks.jl"))

Expand All @@ -14,8 +14,9 @@ addprocs(9);

@everywhere begin
pkgdir(pkg::String) = abspath(joinpath(dirname(Base.find_package(pkg)), ".."))
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmarks")
const LOOPVECBENCHDIR = joinpath(pkgdir("LoopVectorization"), "benchmark")
include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 1
end

gemm_future = @spawnat 2 benchmark_gemm(2:256);
Expand Down
Loading

2 comments on commit 516216b

@chriselrod
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/8032

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if Julia TagBot is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.6 -m "<description of version>" 516216bacd0547f0b9f90ae51d37eb98b3d17402
git push origin v0.3.6

Please sign in to comment.