Skip to content

Commit 976a634

Browse files
committed
feat: enable F32 output in CpuGemmConv2d
- Updated convolution reference to branch epilogue: * TO=float: int32 to float dequant (acc * sA * sB + bias_f32) * TO!=float: usual quantize_down_scale_by_fixedpoint with int32 bias - Changed fixture to use F32 bias tensor for Q->F32 runs (instead of S32), matching arm_gemm dequant epilogue which only supports float bias. - Added explicit template instantiations for convolution_layer with TBias=float, TO=float to fix linker errors in validation. - Disabled activation in arm_gemm dequant path: offsets are applied afterwards by CpuGemmLowpOffsetContributionKernel, so activation must run there to see the correct final accumulator. - src/cpu/kernels/gemmlowp/generic/neon/impl.h neon_run_offset_contribution_float(): replace per-batch offset for vector_sum_col from Y stride to W stride. This aligns target and reference for quantized to F32 convolution tests and prevents premature clamping before offset contributions. Change-Id: I6fffc98dc0798542a2702e6a593b850c16561e3b Signed-off-by: Pablo Marquez Tello <[email protected]>
1 parent 531a496 commit 976a634

File tree

9 files changed

+289
-104
lines changed

9 files changed

+289
-104
lines changed

src/cpu/kernels/gemmlowp/generic/neon/impl.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ void neon_run_offset_contribution_float(const Window &window,
6666
const int window_step_x = 16;
6767

6868
// if vector_sum_col is nullptr then stride_y is 0, else get stride_y
69-
const size_t sum_col_stride_y = (vector_sum_col != nullptr) ? (vector_sum_col->info()->strides_in_bytes().y()) : 0;
70-
Iterator mm_result_it(mm_result, collapsed_window);
69+
const size_t sum_col_stride_w = (vector_sum_col != nullptr) ? vector_sum_col->info()->strides_in_bytes()[3] : 0;
70+
71+
Iterator mm_result_it(mm_result, collapsed_window);
7172

7273
if ((a_offset != 0) && (b_offset != 0) && (vector_sum_col != nullptr) && (vector_sum_row != nullptr)) // true, true
7374
{
@@ -96,7 +97,7 @@ void neon_run_offset_contribution_float(const Window &window,
9697
[&](const Coordinates &id)
9798
{
9899
const int batch_id = id.z() / depth_input;
99-
const size_t batch_offset_col = batch_id * sum_col_stride_y;
100+
const size_t batch_offset_col = batch_id * sum_col_stride_w;
100101
auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_offset_col +
101102
batch_id * vector_sum_col_batch_offset);
102103
auto mm_result_ptr = reinterpret_cast<T *>(mm_result_it.ptr());
@@ -216,7 +217,7 @@ void neon_run_offset_contribution_float(const Window &window,
216217
const int batch_id = id.z() / depth_input;
217218
const size_t batch_offset_col =
218219
batch_id *
219-
sum_col_stride_y; // Value to offset vector_sum_col_ptr to allow for iteration of y values in tensor
220+
sum_col_stride_w; // Value to offset vector_sum_col_ptr to allow for iteration of y values in tensor
220221
auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_offset_col +
221222
batch_id * vector_sum_col_batch_offset);
222223
auto mm_result_ptr = reinterpret_cast<T *>(mm_result_it.ptr());

src/cpu/operators/CpuGemmConv2d.cpp

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -287,12 +287,29 @@ void CpuGemmConv2d::configure_mm(const ITensorInfo *src,
287287
}
288288

289289
GEMMLowpOutputStageInfo output_info;
290-
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
291-
output_info.gemmlowp_offset = uoqinfo.offset;
292-
output_info.gemmlowp_min_bound = min_activation;
293-
output_info.gemmlowp_max_bound = max_activation;
294-
output_info.is_quantized_per_channel = (tmp_weights.data_type() == DataType::QSYMM8_PER_CHANNEL);
295-
quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
290+
291+
// F32 dequant path? (input quantized, output float)
292+
const bool dequantize_f32 = (dst->data_type() == DataType::F32);
293+
294+
if (dequantize_f32)
295+
{
296+
// No requant stage; offsets are handled via offset-contribution on int32
297+
output_info.type = GEMMLowpOutputStageType::NONE;
298+
output_info.gemmlowp_offset = 0;
299+
output_info.gemmlowp_min_bound = 0;
300+
output_info.gemmlowp_max_bound = 0;
301+
output_info.is_quantized_per_channel = false; // irrelevant when NONE
302+
}
303+
else
304+
{
305+
// Existing Q->Q path
306+
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
307+
output_info.gemmlowp_offset = uoqinfo.offset;
308+
output_info.gemmlowp_min_bound = min_activation;
309+
output_info.gemmlowp_max_bound = max_activation;
310+
output_info.is_quantized_per_channel = (tmp_weights.data_type() == DataType::QSYMM8_PER_CHANNEL);
311+
quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
312+
}
296313

297314
const GEMMInfo gemm_info =
298315
GEMMInfo(false /* is_a_reshaped */, false /* is_b_reshaped */, true /* reshape_b_only_on_first_run */,
@@ -367,14 +384,30 @@ Status CpuGemmConv2d::validate_mm(const ITensorInfo *src,
367384
{
368385
std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, uoqinfo);
369386
}
370-
387+
// F32 dequant path? (input quantized, output float)
388+
const bool dequantize_f32 = (dst->data_type() == DataType::F32);
371389
GEMMLowpOutputStageInfo output_info;
372-
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
373-
output_info.gemmlowp_offset = uoqinfo.offset;
374-
output_info.gemmlowp_min_bound = min_activation;
375-
output_info.gemmlowp_max_bound = max_activation;
376-
output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
377-
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info));
390+
391+
if (dequantize_f32)
392+
{
393+
// No requant stage; offsets are handled via offset-contribution on int32
394+
output_info.type = GEMMLowpOutputStageType::NONE;
395+
output_info.gemmlowp_offset = 0;
396+
output_info.gemmlowp_min_bound = 0;
397+
output_info.gemmlowp_max_bound = 0;
398+
output_info.is_quantized_per_channel = false; // irrelevant when NONE
399+
}
400+
else
401+
{
402+
// Existing Q->Q path
403+
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
404+
output_info.gemmlowp_offset = uoqinfo.offset;
405+
output_info.gemmlowp_min_bound = min_activation;
406+
output_info.gemmlowp_max_bound = max_activation;
407+
output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
408+
ARM_COMPUTE_RETURN_ON_ERROR(
409+
quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info));
410+
}
378411

379412
// Perform validation step on GEMMLowp
380413
std::unique_ptr<ITensorInfo> input_qa = src->clone();
@@ -504,9 +537,11 @@ void CpuGemmConv2d::configure(const ITensorInfo *src,
504537
}
505538

506539
const unsigned int mat_weights_cols = weights->dimension(idx_kernels);
540+
const bool dequantize_f32 = is_data_type_quantized(data_type) && dst->data_type() == DataType::F32;
507541

508542
// Create temporary GEMM output tensor in case we cannot skip col2im
509-
const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type;
543+
const DataType output_data_type = data_type == DataType::BFLOAT16 || dequantize_f32 ? DataType::F32 : data_type;
544+
510545
if (!_skip_col2im)
511546
{
512547
TensorShape shape_gemm;
@@ -725,7 +760,14 @@ Status CpuGemmConv2d::validate(const ITensorInfo *src,
725760
{
726761
if (is_quantized)
727762
{
728-
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
763+
if (data_type == DataType::QASYMM8_SIGNED && dst->data_type() == DataType::F32)
764+
{
765+
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32);
766+
}
767+
else
768+
{
769+
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
770+
}
729771
}
730772
else if (is_bf16)
731773
{
@@ -776,8 +818,9 @@ Status CpuGemmConv2d::validate(const ITensorInfo *src,
776818
gemm_input_to_use = &im2col_reshaped_info;
777819
}
778820

821+
const bool dequantize_f32 = is_data_type_quantized(data_type) && dst->data_type() == DataType::F32;
779822
// Create temporary GEMM output tensor in case we cannot skip col2im
780-
const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type;
823+
const DataType output_data_type = data_type == DataType::BFLOAT16 || dequantize_f32 ? DataType::F32 : data_type;
781824
if (!skip_col2im)
782825
{
783826
TensorShape shape_gemm = gemm_input_to_use->tensor_shape();

src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,17 @@ void CpuGemmLowpMatrixMultiplyCore::configure(
132132
_reshape_b_only_on_first_run;
133133
_gemm_info = gemm_info;
134134

135-
const ITensorInfo *a_to_use = a;
136-
135+
// F32 dequant path? (input quantized, output float)
136+
const bool dequantize_f32 = a->data_type() == DataType::QASYMM8_SIGNED && dst->data_type() == DataType::F32;
137+
const ITensorInfo *a_to_use = a;
137138
// Initialize assembly kernel meta-data
138-
const cpu::AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
139+
cpu::AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
140+
if (dequantize_f32)
141+
{
142+
// We don't want arm_gemm to compute the activations because bias and offsets are added in ACL at a later step
143+
// so we disable activation in arm_gemm and run it as a post op in ACL
144+
asm_info.activation_info = arm_compute::ActivationLayerInfo();
145+
}
139146

140147
const int32_t offset_correction = 128;
141148
const DataType dt = DataType::QASYMM8_SIGNED;
@@ -151,7 +158,6 @@ void CpuGemmLowpMatrixMultiplyCore::configure(
151158
{
152159
_flip_signedness = true;
153160
}
154-
155161
_asm_glue = std::make_unique<cpu::CpuGemmAssemblyDispatch>();
156162

157163
// Convert to QASYMM8 -> QASYMM8_SIGNED and back
@@ -200,7 +206,7 @@ void CpuGemmLowpMatrixMultiplyCore::configure(
200206
case DataType::U8:
201207
case DataType::S8:
202208
{
203-
if (is_data_type_quantized_asymmetric(a_to_use->data_type()) &&
209+
if (dst->data_type() != DataType::F32 && is_data_type_quantized_asymmetric(a_to_use->data_type()) &&
204210
info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
205211
{
206212
auto c_info_to_use = c == nullptr ? nullptr : c;
@@ -310,8 +316,9 @@ void CpuGemmLowpMatrixMultiplyCore::configure(
310316
}
311317
// Configure activation
312318
const ActivationLayerInfo &activation = gemm_info.activation_info();
313-
_run_activation =
314-
activation.enabled() && (!_assembly_path || !cpu::CpuGemmAssemblyDispatch::is_activation_supported(activation));
319+
320+
_run_activation = activation.enabled() && (dequantize_f32 || !_assembly_path ||
321+
!cpu::CpuGemmAssemblyDispatch::is_activation_supported(activation));
315322
if (_run_activation)
316323
{
317324
_activation_func = std::make_unique<CpuActivation>();
@@ -488,6 +495,41 @@ Status CpuGemmLowpMatrixMultiplyCore::validate(const ITensorInfo *a,
488495
{
489496
ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));
490497
}
498+
499+
// Q -> F32 path, we add offsets in ACL so we need to validate:
500+
// CpuGemmLowpMatrixAReductionKernel
501+
// CpuGemmLowpMatrixAReductionKernel
502+
if (a->data_type() == DataType::QASYMM8_SIGNED && output->data_type() == DataType::F32)
503+
{
504+
TensorInfo info_vector_sum_col{};
505+
TensorInfo info_vector_sum_row{};
506+
507+
const GEMMLowpReductionKernelInfo reduction_info(a_to_use->dimension(0), false, 0, false);
508+
509+
// Validate matrix B reduction kernel only if _a_offset is not equal to 0
510+
if (a_offset_kernel_needed)
511+
{
512+
info_vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32);
513+
514+
// Configure Matrix B reduction kernel
515+
ARM_COMPUTE_RETURN_ON_ERROR(
516+
kernels::CpuGemmLowpMatrixBReductionKernel::validate(b, &info_vector_sum_col, reduction_info));
517+
}
518+
519+
// Validate Matrix A reduction kernel only if _b_offset is not equal to 0
520+
if (b_offset_kernel_needed)
521+
{
522+
info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);
523+
524+
// Configure matrix A reduction kernel
525+
ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuGemmLowpMatrixAReductionKernel::validate(
526+
a_to_use, &info_vector_sum_row, reduction_info));
527+
}
528+
529+
ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuGemmLowpOffsetContributionKernel::validate(
530+
output, a_offset_kernel_needed ? &info_vector_sum_col : nullptr,
531+
b_offset_kernel_needed ? &info_vector_sum_row : nullptr, a_offset, b_offset));
532+
}
491533
}
492534
else
493535
{

tests/datasets/SmallConvolutionLayerDataset.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021 Arm Limited.
2+
* Copyright (c) 2017-2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,8 +21,8 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24-
#ifndef ARM_COMPUTE_TEST_SMALL_CONVOLUTION_LAYER_DATASET
25-
#define ARM_COMPUTE_TEST_SMALL_CONVOLUTION_LAYER_DATASET
24+
#ifndef ACL_TESTS_DATASETS_SMALLCONVOLUTIONLAYERDATASET_H
25+
#define ACL_TESTS_DATASETS_SMALLCONVOLUTIONLAYERDATASET_H
2626

2727
#include "tests/datasets/ConvolutionLayerDataset.h"
2828

@@ -246,4 +246,4 @@ class SmallGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset
246246
} // namespace datasets
247247
} // namespace test
248248
} // namespace arm_compute
249-
#endif /* ARM_COMPUTE_TEST_SMALL_CONVOLUTION_LAYER_DATASET */
249+
#endif // ACL_TESTS_DATASETS_SMALLCONVOLUTIONLAYERDATASET_H

tests/validation/NEON/ConvolutionLayer.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,10 @@ template <typename T>
13631363
using NEGEMMConvolutionLayerForUpdatedStaticQuantInfoAfterConfigureFixture = ConvolutionValidationForUpdatedStaticQuantInfoAfterConfigureFixture<Tensor, Accessor, NEGEMMConvolutionLayer, T>;
13641364
template <typename T>
13651365
using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>;
1366+
template <typename T>
1367+
using NEGEMMConvolutionLayerQuantizedF32OutputFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T,false,float>;
1368+
1369+
13661370
template <typename T>
13671371
using NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
13681372

@@ -1397,6 +1401,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerForUpdatedStaticQuantInfo
13971401
// Validate output
13981402
validate(Accessor(_target), _reference, tolerance_qasymm8);
13991403
}
1404+
1405+
FIXTURE_DATA_TEST_CASE(RunSmallDequantizeF32, NEGEMMConvolutionLayerQuantizedF32OutputFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1406+
framework::dataset::make("ReshapeWeights", { true })),
1407+
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
1408+
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1409+
framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(2.f / 255.f, 10) })),
1410+
ActivationFunctionsDataset))
1411+
{
1412+
// Validate output
1413+
validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
1414+
}
1415+
1416+
1417+
1418+
14001419
TEST_SUITE_END() // QASYMM8_SIGNED
14011420

14021421
TEST_SUITE(QASYMM8)
@@ -1425,6 +1444,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>
14251444
// Validate output
14261445
validate(Accessor(_target), _reference, tolerance_qasymm8);
14271446
}
1447+
14281448
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
14291449
combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
14301450
framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),

0 commit comments

Comments
 (0)