Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 28 additions & 31 deletions src/kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,20 @@ pub(crate) fn fft_chunk_n_simd(
let (imags_s0, imags_s1) = imags_chunk.split_at_mut(dist);

reals_s0
.chunks_exact_mut(8)
.zip(reals_s1.chunks_exact_mut(8))
.zip(imags_s0.chunks_exact_mut(8))
.zip(imags_s1.chunks_exact_mut(8))
.zip(twiddles_re.chunks_exact(8))
.zip(twiddles_im.chunks_exact(8))
.array_chunks_mut::<8>()
.zip(reals_s1.array_chunks_mut::<8>())
.zip(imags_s0.array_chunks_mut::<8>())
.zip(imags_s1.array_chunks_mut::<8>())
.zip(twiddles_re.array_chunks::<8>())
.zip(twiddles_im.array_chunks::<8>())
.for_each(|(((((re_s0, re_s1), im_s0), im_s1), w_re), w_im)| {
let real_c0 = f64x8::from_slice(re_s0);
let real_c1 = f64x8::from_slice(re_s1);
let imag_c0 = f64x8::from_slice(im_s0);
let imag_c1 = f64x8::from_slice(im_s1);
let real_c0 = f64x8::from_array(*re_s0);
let real_c1 = f64x8::from_array(*re_s1);
let imag_c0 = f64x8::from_array(*im_s0);
let imag_c1 = f64x8::from_array(*im_s1);

let tw_re = f64x8::from_slice(w_re);
let tw_im = f64x8::from_slice(w_im);
let tw_re = f64x8::from_array(*w_re);
let tw_im = f64x8::from_array(*w_im);

re_s0.copy_from_slice((real_c0 + real_c1).as_array());
im_s0.copy_from_slice((imag_c0 + imag_c1).as_array());
Expand Down Expand Up @@ -87,15 +87,15 @@ pub(crate) fn fft_chunk_n(

/// `chunk_size == 4`, so hard code twiddle factors
pub(crate) fn fft_chunk_4(reals: &mut [Float], imags: &mut [Float]) {
let dist = 2;
let chunk_size = dist << 1;
const DIST: usize = 2;
const CHUNK_SIZE: usize = DIST << 1;

reals
.chunks_exact_mut(chunk_size)
.zip(imags.chunks_exact_mut(chunk_size))
.array_chunks_mut::<CHUNK_SIZE>()
.zip(imags.array_chunks_mut::<CHUNK_SIZE>())
.for_each(|(reals_chunk, imags_chunk)| {
let (reals_s0, reals_s1) = reals_chunk.split_at_mut(dist);
let (imags_s0, imags_s1) = imags_chunk.split_at_mut(dist);
let (reals_s0, reals_s1) = reals_chunk.split_at_mut(DIST);
let (imags_s0, imags_s1) = imags_chunk.split_at_mut(DIST);

let real_c0 = reals_s0[0];
let real_c1 = reals_s1[0];
Expand All @@ -122,17 +122,14 @@ pub(crate) fn fft_chunk_4(reals: &mut [Float], imags: &mut [Float]) {
/// `chunk_size == 2`, so skip phase
pub(crate) fn fft_chunk_2(reals: &mut [Float], imags: &mut [Float]) {
reals
.chunks_exact_mut(2)
.zip(imags.chunks_exact_mut(2))
.for_each(|(reals_chunk, imags_chunk)| {
let z0_re = reals_chunk[0];
let z0_im = imags_chunk[0];
let z1_re = reals_chunk[1];
let z1_im = imags_chunk[1];

reals_chunk[0] = z0_re + z1_re;
imags_chunk[0] = z0_im + z1_im;
reals_chunk[1] = z0_re - z1_re;
imags_chunk[1] = z0_im - z1_im;
});
.array_chunks_mut::<2>()
.zip(imags.array_chunks_mut::<2>())
.for_each(
|(reals_chunk @ &mut [z0_re, z1_re], imags_chunk @ &mut [z0_im, z1_im])| {
reals_chunk[0] = z0_re + z1_re;
imags_chunk[0] = z0_im + z1_im;
reals_chunk[1] = z0_re - z1_re;
imags_chunk[1] = z0_im - z1_im;
},
);
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#![warn(clippy::suspicious)]
#![warn(clippy::perf)]
#![forbid(unsafe_code)]
#![feature(portable_simd)]
#![feature(portable_simd, array_chunks)]

use crate::cobra::cobra_apply;
use crate::kernels::{fft_chunk_2, fft_chunk_4, fft_chunk_n, fft_chunk_n_simd, Float};
Expand Down