@@ -30,6 +30,29 @@ Base.setindex!(br::BenchmarkResult, v, i...) = br.sizedresults.results[i...] = v
3030function Base. show (io:: IO , br:: BenchmarkResult )
3131 pretty_table (io, br. sizedresults, br. tests)
3232end
33+
34+ using VegaLite, IndexedTables
35+ function plot (br:: BenchmarkResult )
36+ res = vec (br. sizedresults. results)
37+ brsizes = br. sizedresults. sizes
38+ sizes = Vector {eltype(brsizes)} (undef, length (res))
39+ ntests = length (br. tests) - 1
40+ for i ∈ 0 : length (brsizes)- 1
41+ si = brsizes[i+ 1 ]
42+ for j ∈ 1 : ntests
43+ sizes[j + i* ntests] = si
44+ end
45+ end
46+ tests = vcat ((@view (br. tests[2 : end ]) for _ ∈ eachindex (brsizes)). .. )
47+ t = table ((GFLOPS = res, Size = sizes, Method = tests))
48+ t |> @vlplot (
49+ :line ,
50+ x = :Size ,
51+ y = :GFLOPS ,
52+ color = :Method
53+ )
54+ end
55+
3356function alloc_matrices (s:: NTuple{3,Int} )
3457 M, K, N = s
3558 C = Matrix {Float64} (undef, M, N)
@@ -38,8 +61,8 @@ function alloc_matrices(s::NTuple{3,Int})
3861 C, A, B
3962end
4063alloc_matrices (s:: Int ) = alloc_matrices ((s,s,s))
41- gflop (s:: Int ) = s^ 3 * 1e -9
42- gflop (s:: NTuple{3,Int} ) = prod (s) * 1e -9
64+ gflop (s:: Int ) = s^ 3 * 2e -9
65+ gflop (s:: NTuple{3,Int} ) = prod (s) * 2e -9
4366function benchmark_gemm (sizes)
4467 tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " GFort-intrinsic" , " LoopVectorization" ]
4568 br = BenchmarkResult (tests, sizes)
@@ -61,27 +84,108 @@ function benchmark_gemm(sizes)
6184 end
6285 br
6386end
87+ function benchmark_dot (sizes)
88+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
89+ br = BenchmarkResult (tests, sizes)
90+ for (i,s) ∈ enumerate (sizes)
91+ a = rand (s); b = rand (s);
92+ n_gflop = s * 2e-9
93+ br[1 ,i] = n_gflop / @belapsed dot ($ a, $ b)
94+ dotblas = dot (a, b)
95+ br[2 ,i] = n_gflop / @belapsed jdot ($ a, $ b)
96+ @assert jdot (a,b) ≈ dotblas " Julia dot wrong?"
97+ br[3 ,i] = n_gflop / @belapsed cdot ($ a, $ b)
98+ @assert cdot (a,b) ≈ dotblas " Polly dot wrong?"
99+ br[4 ,i] = n_gflop / @belapsed fdot ($ a, $ b)
100+ @assert fdot (a,b) ≈ dotblas " Fort dot wrong?"
101+ br[5 ,i] = n_gflop / @belapsed jdotavx ($ a, $ b)
102+ @assert jdotavx (a,b) ≈ dotblas " LoopVec dot wrong?"
103+ end
104+ br
105+ end
106+ function benchmark_selfdot (sizes)
107+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
108+ br = BenchmarkResult (tests, sizes)
109+ for (i,s) ∈ enumerate (sizes)
110+ a = rand (s);
111+ n_gflop = s * 2e-9
112+ br[1 ,i] = n_gflop / @belapsed dot ($ a, $ a)
113+ dotblas = dot (a, a)
114+ br[2 ,i] = n_gflop / @belapsed jselfdot ($ a)
115+ @assert jselfdot (a) ≈ dotblas " Julia dot wrong?"
116+ br[3 ,i] = n_gflop / @belapsed cselfdot ($ a)
117+ @assert cselfdot (a) ≈ dotblas " Polly dot wrong?"
118+ br[4 ,i] = n_gflop / @belapsed fselfdot ($ a)
119+ @assert fselfdot (a) ≈ dotblas " Fort dot wrong?"
120+ br[5 ,i] = n_gflop / @belapsed jselfdotavx ($ a)
121+ @assert jselfdotavx (a) ≈ dotblas " LoopVec dot wrong?"
122+ end
123+ br
124+ end
125+ totwotuple (i:: Int ) = (i,i)
126+ totwotuple (i:: Tuple{Int,Int} ) = i
127+ function sse! (Xβ, y, X, β)
128+ mul! (copyto! (Xβ, y), X, β, 1.0 , - 1.0 )
129+ dot (Xβ, Xβ)
130+ end
131+ function benchmark_sse (sizes)
132+ tests = [BLAS. vendor () === :mkl ? " IntelMKL" : " OpenBLAS" , " Julia" , " Clang-Polly" , " GFort-loops" , " GFort-intrinsic" , " LoopVectorization" ]
133+ br = BenchmarkResult (tests, sizes)
134+ for (i,s) ∈ enumerate (sizes)
135+ N, P = totwotuple (s)
136+ y = rand (N); β = rand (P)
137+ X = randn (N, P)
138+ Xβ = similar (y)
139+ n_gflop = 2e-9 * (P* N + 2 N)
140+ br[1 ,i] = n_gflop / @belapsed sse! ($ Xβ, $ y, $ X, $ β)
141+ lpblas = sse! (Xβ, y, X, β)
142+ br[2 ,i] = n_gflop / @belapsed jOLSlp ($ y, $ X, $ β)
143+ @assert jOLSlp (y, X, β) ≈ lpblas " Julia wrong?"
144+ br[3 ,i] = n_gflop / @belapsed cOLSlp ($ y, $ X, $ β)
145+ @assert cOLSlp (y, X, β) ≈ lpblas " Polly wrong?"
146+ br[4 ,i] = n_gflop / @belapsed fOLSlp ($ y, $ X, $ β)
147+ @assert fOLSlp (y, X, β) ≈ lpblas " Fort wrong?"
148+ br[5 ,i] = n_gflop / @belapsed jOLSlp_avx ($ y, $ X, $ β)
149+ @assert jOLSlp_avx (y, X, β) ≈ lpblas " LoopVec wrong?"
150+ end
151+ br
152+ end
64153
65- using VegaLite, IndexedTables
66- function plot (br:: BenchmarkResult )
67- res = vec (br. sizedresults. results)
68- brsizes = br. sizedresults. sizes
69- sizes = Vector {eltype(brsizes)} (undef, length (res))
70- ntests = length (br. tests) - 1
71- for i ∈ 0 : length (brsizes)- 1
72- si = brsizes[i+ 1 ]
73- for j ∈ 1 : ntests
74- sizes[j + i* ntests] = si
75- end
154+ function benchmark_exp (sizes)
155+ tests = [" Julia" , " GFort-loops" , " LoopVectorization" ]
156+ br = BenchmarkResult (tests, sizes)
157+ for (i,s) ∈ enumerate (sizes)
158+ a = rand (s); b = similar (a)
159+ n_gflop = s # not really gflops
160+ br[1 ,i] = n_gflop / @belapsed @. $ b = exp ($ a)
161+ baseb = copy (b)
162+ br[2 ,i] = n_gflop / @belapsed fvexp! ($ b, $ a)
163+ @assert b ≈ baseb " Fort wrong?"
164+ br[3 ,i] = n_gflop / @belapsed @avx @. $ b = exp ($ a)
165+ @assert b ≈ baseb " LoopVec wrong?"
76166 end
77- tests = vcat ((@view (br. tests[2 : end ]) for _ ∈ eachindex (brsizes)). .. )
78- t = table ((GFLOPS = res, Size = sizes, Method = tests))
79- t |> @vlplot (
80- :line ,
81- x = :Size ,
82- y = :GFLOPS ,
83- color = :Method
84- )
167+ br
168+ end
169+
170+ function benchmark_aplusBc (sizes)
171+ tests = [" Julia" , " Clang-Polly" , " GFort-loops" , " LoopVectorization" ]
172+ br = BenchmarkResult (tests, sizes)
173+ for (i,s) ∈ enumerate (sizes)
174+ M, N = totwotuple (s)
175+ a = rand (M); B = rand (M,N); c = rand (N);
176+ c′ = c' ; D = similar (B)
177+ n_gflop = 2e-9 * M* N
178+ br[1 ,i] = n_gflop / @belapsed @. $ D = $ a + $ B * $ c′
179+ Dcopy = copy (D)
180+ br[2 ,i] = n_gflop / @belapsed caplusBc! ($ D, $ a, $ B, $ c)
181+ @assert D ≈ Dcopy " Polly wrong?"
182+ br[3 ,i] = n_gflop / @belapsed faplusBc! ($ D, $ a, $ B, $ c)
183+ @assert D ≈ Dcopy " Fort wrong?"
184+ br[4 ,i] = n_gflop / @belapsed @avx @. $ D = $ a + $ B * $ c′
185+ @assert D ≈ Dcopy " LoopVec wrong?"
186+ end
187+ br
85188end
86189
87190
191+
0 commit comments