diff --git a/src/TransposeUtils.cc b/src/TransposeUtils.cc index aecec554da..c889b907f5 100644 --- a/src/TransposeUtils.cc +++ b/src/TransposeUtils.cc @@ -57,7 +57,7 @@ void transpose_simd( #else static const auto iset = fbgemmInstructionSet(); // Run time CPU detection -#if defined(FBGEMM_FBCODE) || !defined(__aarch64__) +#if defined(FBGEMM_FBCODE) if (isZmm(iset)) { internal::transpose_avx512(M, N, src, ld_src, dst, ld_dst); } else if (isYmm(iset)) { diff --git a/src/UtilsAvx2.cc b/src/UtilsAvx2.cc index d9c4d33be6..317d021d6c 100644 --- a/src/UtilsAvx2.cc +++ b/src/UtilsAvx2.cc @@ -15,6 +15,8 @@ namespace fbgemm::internal { +#ifdef __AVX2__ + template <> void transpose_avx2( int64_t M, @@ -336,4 +338,6 @@ void transpose_avx2( } } +#endif // __AVX2__ + } // namespace fbgemm::internal diff --git a/src/UtilsAvx512.cc b/src/UtilsAvx512.cc index a2bcc276f2..619dbc35db 100644 --- a/src/UtilsAvx512.cc +++ b/src/UtilsAvx512.cc @@ -16,6 +16,8 @@ namespace fbgemm { namespace { +#ifdef __AVX512F__ + // 16 * 6 = 96 instructions inline void transpose_kernel_16x16_avx512( const float* src, @@ -2441,6 +2443,8 @@ void transpose_avx512( } } +#endif // __AVX512F__ + } // namespace internal } // namespace fbgemm