We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 06d9294 commit 6fe76f2Copy full SHA for 6fe76f2
benchmarks/driver.jl
@@ -129,7 +129,7 @@ function sse!(Xβ, y, X, β)
129
dot(Xβ, Xβ)
130
end
131
function benchmark_sse(sizes)
132
- tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "GFort-intrinsic", "LoopVectorization"]
+ tests = [BLAS.vendor() === :mkl ? "IntelMKL" : "OpenBLAS", "Julia", "Clang-Polly", "GFort-loops", "LoopVectorization"]
133
br = BenchmarkResult(tests, sizes)
134
for (i,s) ∈ enumerate(sizes)
135
N, P = totwotuple(s)
@@ -156,7 +156,7 @@ function benchmark_exp(sizes)
156
157
158
a = rand(s); b = similar(a)
159
- n_gflop = s # not really gflops
+ n_gflop = 1e-9*s # not really gflops
160
br[1,i] = n_gflop / @belapsed @. $b = exp($a)
161
baseb = copy(b)
162
br[2,i] = n_gflop / @belapsed fvexp!($b, $a)
benchmarks/loadsharedlibs.jl
@@ -113,16 +113,16 @@ function caplusBc!(D, a, B, c)
113
M, K = size(B)
114
ccall(
115
(:aplusBc, LIBCTEST), Cvoid,
116
- (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Clong, Clong),
117
- y, A, x, M, K
+ (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Clong, Clong),
+ D, a, B, c, M, K
118
)
119
120
function faplusBc!(D, a, B, c)
121
122
123
(:aplusBc, LIBFTEST), Cvoid,
124
- (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{Clong}, Ref{Clong}),
125
- y, A, x, Ref(M), Ref(K)
+ (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{Clong}, Ref{Clong}),
+ D, a, B, c, Ref(M), Ref(K)
126
127
128
function cOLSlp(y, X, β)
benchmarks/looptests.c
@@ -118,7 +118,7 @@ void unscaledvar(double* restrict s, double* restrict A, double* restrict xb, lo
return;
}
-void aplucBc(double* restrict D, double* restrict a, double* restrict B, double* restrict c, long M, long N){
+void aplusBc(double* restrict D, double* restrict a, double* restrict B, double* restrict c, long M, long N){
for (long n = 0; n < N; n++){
for (long m = 0; m < M; m++){
D[m + n*M] = a[m] + B[m + n*M] * c[n];
benchmarks/looptests.f90
@@ -176,7 +176,7 @@ subroutine unscaledvar(s, A, x, M, N) BIND(C, name="unscaledvar")
176
end do
177
178
end subroutine unscaledvar
179
- subroutine aplusBc(D, a, B, c, M, N) BIND(C, name="aplucBc")
+ subroutine aplusBc(D, a, B, c, M, N) BIND(C, name="aplusBc")
180
integer(C_long), intent(in) :: M, N
181
real(C_double), intent(in) :: a(M), B(M,N), c(N)
182
real(C_double), dimension(M,N), intent(out) :: D
benchmarks/looptests.jl
@@ -113,9 +113,9 @@ japlucBcavx!(d, a, B, c) = @avx @. d = a + B * c';
function jOLSlp(y, X, β)
lp = 0.0
- @inbounds for i ∈ eachindex(y)
+ @inbounds @fastmath for i ∈ eachindex(y)
δ = y[i]
- @simd for j ∈ eachindex(x)
+ @simd for j ∈ eachindex(β)
δ -= X[i,j] * β[j]
lp += δ * δ
0 commit comments