diff --git a/src/kernels.rs b/src/kernels.rs index 67991da..fd71e9d 100644 --- a/src/kernels.rs +++ b/src/kernels.rs @@ -20,20 +20,20 @@ pub(crate) fn fft_chunk_n_simd( let (imags_s0, imags_s1) = imags_chunk.split_at_mut(dist); reals_s0 - .chunks_exact_mut(8) - .zip(reals_s1.chunks_exact_mut(8)) - .zip(imags_s0.chunks_exact_mut(8)) - .zip(imags_s1.chunks_exact_mut(8)) - .zip(twiddles_re.chunks_exact(8)) - .zip(twiddles_im.chunks_exact(8)) + .array_chunks_mut::<8>() + .zip(reals_s1.array_chunks_mut::<8>()) + .zip(imags_s0.array_chunks_mut::<8>()) + .zip(imags_s1.array_chunks_mut::<8>()) + .zip(twiddles_re.array_chunks::<8>()) + .zip(twiddles_im.array_chunks::<8>()) .for_each(|(((((re_s0, re_s1), im_s0), im_s1), w_re), w_im)| { - let real_c0 = f64x8::from_slice(re_s0); - let real_c1 = f64x8::from_slice(re_s1); - let imag_c0 = f64x8::from_slice(im_s0); - let imag_c1 = f64x8::from_slice(im_s1); + let real_c0 = f64x8::from_array(*re_s0); + let real_c1 = f64x8::from_array(*re_s1); + let imag_c0 = f64x8::from_array(*im_s0); + let imag_c1 = f64x8::from_array(*im_s1); - let tw_re = f64x8::from_slice(w_re); - let tw_im = f64x8::from_slice(w_im); + let tw_re = f64x8::from_array(*w_re); + let tw_im = f64x8::from_array(*w_im); re_s0.copy_from_slice((real_c0 + real_c1).as_array()); im_s0.copy_from_slice((imag_c0 + imag_c1).as_array()); @@ -87,15 +87,15 @@ pub(crate) fn fft_chunk_n( /// `chunk_size == 4`, so hard code twiddle factors pub(crate) fn fft_chunk_4(reals: &mut [Float], imags: &mut [Float]) { - let dist = 2; - let chunk_size = dist << 1; + const DIST: usize = 2; + const CHUNK_SIZE: usize = DIST << 1; reals - .chunks_exact_mut(chunk_size) - .zip(imags.chunks_exact_mut(chunk_size)) + .array_chunks_mut::() + .zip(imags.array_chunks_mut::()) .for_each(|(reals_chunk, imags_chunk)| { - let (reals_s0, reals_s1) = reals_chunk.split_at_mut(dist); - let (imags_s0, imags_s1) = imags_chunk.split_at_mut(dist); + let (reals_s0, reals_s1) = reals_chunk.split_at_mut(DIST); + let (imags_s0, imags_s1) = imags_chunk.split_at_mut(DIST); let real_c0 = reals_s0[0]; let real_c1 = reals_s1[0]; @@ -122,17 +122,14 @@ pub(crate) fn fft_chunk_4(reals: &mut [Float], imags: &mut [Float]) { /// `chunk_size == 2`, so skip phase pub(crate) fn fft_chunk_2(reals: &mut [Float], imags: &mut [Float]) { reals - .chunks_exact_mut(2) - .zip(imags.chunks_exact_mut(2)) - .for_each(|(reals_chunk, imags_chunk)| { - let z0_re = reals_chunk[0]; - let z0_im = imags_chunk[0]; - let z1_re = reals_chunk[1]; - let z1_im = imags_chunk[1]; - - reals_chunk[0] = z0_re + z1_re; - imags_chunk[0] = z0_im + z1_im; - reals_chunk[1] = z0_re - z1_re; - imags_chunk[1] = z0_im - z1_im; - }); + .array_chunks_mut::<2>() + .zip(imags.array_chunks_mut::<2>()) + .for_each( + |(reals_chunk @ &mut [z0_re, z1_re], imags_chunk @ &mut [z0_im, z1_im])| { + reals_chunk[0] = z0_re + z1_re; + imags_chunk[0] = z0_im + z1_im; + reals_chunk[1] = z0_re - z1_re; + imags_chunk[1] = z0_im - z1_im; + }, + ); } diff --git a/src/lib.rs b/src/lib.rs index 4d33350..66fe8b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,7 @@ #![warn(clippy::suspicious)] #![warn(clippy::perf)] #![forbid(unsafe_code)] -#![feature(portable_simd)] +#![feature(portable_simd, array_chunks)] use crate::cobra::cobra_apply; use crate::kernels::{fft_chunk_2, fft_chunk_4, fft_chunk_n, fft_chunk_n_simd, Float};