Skip to content

Commit ed51f4e

Browse files
Merge pull request #97 from Shreyas-Ekanathan/master
Add Butterfly Factorization
2 parents 84e567d + 11f3224 commit ed51f4e

File tree

5 files changed

+232
-1
lines changed

5 files changed

+232
-1
lines changed

Project.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,20 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
88
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
99
Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
1010
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
11+
SparseBandedMatrices = "bd59d7e1-4699-4102-944e-d05209cb92aa"
1112
StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da"
1213
TriangularSolve = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
14+
VectorizedRNG = "33b4df10-0173-11e9-2a0c-851a7edac40e"
1315

1416
[compat]
1517
LinearAlgebra = "1.5"
1618
LoopVectorization = "0.10,0.11, 0.12"
1719
Polyester = "0.3.2,0.4.1, 0.5, 0.6, 0.7"
1820
PrecompileTools = "1"
21+
SparseBandedMatrices = "1"
1922
StrideArraysCore = "0.5.5"
2023
TriangularSolve = "0.2"
24+
VectorizedRNG = "0.2.25"
2125
julia = "1.10"
2226

2327
[extras]

perf/lu.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,4 @@ xaxis!(plt, "size (N x N)")
7878
yaxis!(plt, "GFLOPS")
7979
savefig("lubench.png")
8080
savefig("lubench.pdf")
81-
=#
81+
=#

src/RecursiveFactorization.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ if isdefined(Base, :Experimental) &&
44
@eval Base.Experimental.@max_methods 1
55
end
66
include("./lu.jl")
7+
include("./butterflylu.jl")
78

89
import PrecompileTools
910

src/butterflylu.jl

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
using VectorizedRNG
2+
using LinearAlgebra: Diagonal, I
3+
using LoopVectorization
4+
using RecursiveFactorization
5+
using SparseBandedMatrices
6+
7+
@inline exphalf(x) = exp(x) * oftype(x, 0.5)
8+
function 🦋!(wv, ::Val{SEED} = Val(888)) where {SEED}
9+
T = eltype(wv)
10+
mrng = VectorizedRNG.MutableXoshift(SEED)
11+
GC.@preserve mrng begin rand!(exphalf, VectorizedRNG.Xoshift(mrng), wv, static(0),
12+
T(-0.05), T(0.1)) end
13+
end
14+
15+
function 🦋generate_random!(A, ::Val{SEED} = Val(888)) where {SEED}
16+
Usz = 2 * size(A, 1)
17+
Vsz = 2 * size(A, 2)
18+
uv = similar(A, Usz + Vsz)
19+
🦋!(uv, Val(SEED))
20+
(uv,)
21+
end
22+
23+
function 🦋workspace(A, b, B::Matrix{T}, U::Adjoint{T, Matrix{T}}, V::Matrix{T}, thread, ::Val{SEED} = Val(888)) where {T, SEED}
24+
M = size(A, 1)
25+
if (M % 4 != 0)
26+
A = pad!(A)
27+
end
28+
B = similar(A)
29+
ws = 🦋generate_random!(copyto!(B, A))
30+
🦋mul!(copyto!(B, A), ws)
31+
U, V = materializeUV(B, ws)
32+
F = RecursiveFactorization.lu!(B, thread)
33+
out = similar(b, M)
34+
35+
U, V, F, out
36+
end
37+
38+
const butterfly_workspace = 🦋workspace;
39+
40+
function 🦋mul_level!(A, u, v)
41+
M, N = size(A)
42+
@assert M == length(u) && N == length(v)
43+
Mh = M >>> 1
44+
Nh = N >>> 1
45+
@turbo for n in 1 : Nh
46+
for m in 1 : Mh
47+
A11 = A[m, n]
48+
A21 = A[m + Mh, n]
49+
A12 = A[m, n + Nh]
50+
A22 = A[m + Mh, n + Nh]
51+
52+
T1 = A11 + A12
53+
T2 = A21 + A22
54+
T3 = A11 - A12
55+
T4 = A21 - A22
56+
C11 = T1 + T2
57+
C21 = T1 - T2
58+
C12 = T3 + T4
59+
C22 = T3 - T4
60+
61+
u1 = u[m]
62+
u2 = u[m + Mh]
63+
v1 = v[n]
64+
v2 = v[n + Nh]
65+
66+
A[m, n] = u1 * C11 * v1
67+
A[m + Mh, n] = u2 * C21 * v1
68+
A[m, n + Nh] = u1 * C12 * v2
69+
A[m + Mh, n + Nh] = u2 * C22 * v2
70+
end
71+
end
72+
end
73+
74+
function 🦋mul!(A, (uv,))
75+
M, N = size(A)
76+
@assert M == N
77+
Mh = M >>> 1
78+
79+
U₁ = @view(uv[1:Mh])
80+
V₁ = @view(uv[(Mh + 1):(M)])
81+
U₂ = @view(uv[(1 + M):(M + Mh)])
82+
V₂ = @view(uv[(1 + M + Mh):(2 * M)])
83+
84+
🦋mul_level!(@view(A[1:Mh, 1:Mh]), U₁, V₁)
85+
🦋mul_level!(@view(A[Mh + 1:M, 1:Mh]), U₂, V₁)
86+
🦋mul_level!(@view(A[1:Mh, Mh + 1:M]), U₁, V₂)
87+
🦋mul_level!(@view(A[Mh + 1:M, Mh + 1:M]), U₂, V₂)
88+
89+
U = @view(uv[(1 + 2 * M):(3 * M)])
90+
V = @view(uv[(1 + 3 * M):(4 * M)])
91+
92+
🦋mul_level!(@view(A[1:M, 1:N]), U, V)
93+
A
94+
end
95+
96+
function diagnegbottom(x)
97+
N = length(x)
98+
y = similar(x, N >>> 1)
99+
z = similar(x, N >>> 1)
100+
for n in 1:(N >>> 1)
101+
y[n] = x[n]
102+
end
103+
for n in 1:(N >>> 1)
104+
z[n] = x[n + (N >>> 1)]
105+
end
106+
Diagonal(y), Diagonal(z)
107+
end
108+
109+
function 🦋2!(C, A::Diagonal, B::Diagonal)
110+
@assert size(A) == size(B)
111+
A1 = size(A, 1)
112+
113+
for i in 1:A1
114+
C[i, i] = A[i, i]
115+
C[i + A1, i] = A[i, i]
116+
C[i, i + A1] = B[i, i]
117+
C[i + A1, i + A1] = -B[i, i]
118+
end
119+
120+
C
121+
end
122+
123+
function 🦋!(A::Matrix, C::SparseBandedMatrix, X::Diagonal, Y::Diagonal)
124+
@assert size(X) == size(Y)
125+
if (size(X, 1) + size(Y, 1) != size(A, 1))
126+
x = size(A, 1) - size(X, 1) - size(Y, 1)
127+
setdiagonal!(C, [X.diag; rand(x); -Y.diag], true)
128+
setdiagonal!(C, X.diag, true)
129+
setdiagonal!(C, Y.diag, false)
130+
else
131+
setdiagonal!(C, [X.diag; -Y.diag], true)
132+
setdiagonal!(C, X.diag, true)
133+
setdiagonal!(C, Y.diag, false)
134+
end
135+
136+
C
137+
end
138+
139+
function 🦋2!(C::SparseBandedMatrix, A::Diagonal, B::Diagonal)
140+
setdiagonal!(C, [A.diag; -B.diag], true)
141+
setdiagonal!(C, A.diag, true)
142+
setdiagonal!(C, B.diag, false)
143+
C
144+
end
145+
146+
function materializeUV(A, (uv,))
147+
M, N = size(A)
148+
Mh = M >>> 1
149+
Nh = N >>> 1
150+
151+
U₁u, U₁l = diagnegbottom(@view(uv[1:Mh])) #Mh
152+
U₂u, U₂l = diagnegbottom(@view(uv[(1 + Mh + Nh):(M + Nh)])) #M2
153+
V₁u, V₁l = diagnegbottom(@view(uv[(Mh + 1):(Mh + Nh)])) #Nh
154+
V₂u, V₂l = diagnegbottom(@view(uv[(1 + 2 * Mh + Nh):(2 * Mh + N)])) #N2
155+
Uu, Ul = diagnegbottom(@view(uv[(1 + M + N):(2 * M + N)])) #M
156+
Vu, Vl = diagnegbottom(@view(uv[(1 + 2 * M + N):(2 * M + 2 * N)])) #N
157+
158+
Bu2 = SparseBandedMatrix{typeof(uv[1])}(undef, M, N)
159+
160+
🦋2!(view(Bu2, 1 : Mh, 1 : Nh), U₁u, U₁l)
161+
🦋2!(view(Bu2, Mh + 1: M, Nh + 1: N), U₂u, U₂l)
162+
163+
Bu1 = SparseBandedMatrix{typeof(uv[1])}(undef, M, N)
164+
🦋!(A, Bu1, Uu, Ul)
165+
166+
Bv2 = SparseBandedMatrix{typeof(uv[1])}(undef, M, N)
167+
168+
🦋2!(view(Bv2, 1 : Mh, 1 : Nh), V₁u, V₁l)
169+
🦋2!(view(Bv2, Mh + 1: M, Nh + 1: N), V₂u, V₂l)
170+
171+
Bv1 = SparseBandedMatrix{typeof(uv[1])}(undef, M, N)
172+
🦋!(A, Bv1, Vu, Vl)
173+
174+
U = (Bu2 * Bu1)'
175+
V = Bv2 * Bv1
176+
177+
U, V
178+
end
179+
180+
function pad!(A)
181+
M, N = size(A)
182+
xn = 4 - M % 4
183+
A_new = similar(A, M + xn, N + xn)
184+
for j in 1 : N, i in 1 : M
185+
@inbounds A_new[i, j] = A[i, j]
186+
end
187+
188+
for j in M + 1 : M + xn, i in 1:M
189+
@inbounds A_new[i, j] = 0
190+
@inbounds A_new[j, i] = 0
191+
end
192+
193+
for j in N + 1 : N + xn, i in M + 1 : M + xn
194+
@inbounds A_new[i,j] = i == j
195+
end
196+
A_new
197+
end

test/runtests.jl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,32 @@ testlu(A::Union{Transpose, Adjoint}, MF, BF, p) = testlu(parent(A), parent(MF),
6464
end
6565
end
6666
end
67+
68+
function wilkinson(N)
69+
A = zeros(N, N)
70+
A[1:(N+1):N*N] .= 1
71+
A[:, end] .= 1
72+
for n in 1:(N - 1)
73+
for r in (n + 1):N
74+
@inbounds A[r, n] = -1
75+
end
76+
end
77+
A
78+
end
79+
80+
@testset "🦋" begin
81+
for i in 790 : 810
82+
A = wilkinson(i)
83+
b = rand(i)
84+
U, V, F, out = RecursiveFactorization.🦋workspace(A, b, A, A', A, Val(true))
85+
M = size(A, 1)
86+
xn = 4 - M % 4
87+
if (M % 4 != 0)
88+
xn = 4 - M % 4
89+
b = [b; rand(xn)]
90+
end
91+
sol = V * (F \ (U * b))
92+
out .= @view sol[1:M]
93+
@test norm(A * out .- b[1:M]) <= 1e-10
94+
end
95+
end

0 commit comments

Comments
 (0)