Skip to content

Commit 88c1594

Browse files
committed
feat: enable F32 output in CpuGemmConv2d
- Updated convolution reference to branch epilogue: * TO=float: int32 to float dequant (acc * sA * sB + bias_f32) * TO!=float: usual quantize_down_scale_by_fixedpoint with int32 bias - Changed fixture to use F32 bias tensor for Q->F32 runs (instead of S32), matching arm_gemm dequant epilogue which only supports float bias. - Added explicit template instantiations for convolution_layer with TBias=float, TO=float to fix linker errors in validation. - Disabled activation in arm_gemm dequant path: offsets are applied afterwards by CpuGemmLowpOffsetContributionKernel, so activation must run there to see the correct final accumulator. - src/cpu/kernels/gemmlowp/generic/neon/impl.h neon_run_offset_contribution_float(): replace per-batch offset for vector_sum_col from Y stride to W stride. This aligns target and reference for quantized to F32 convolution tests and prevents premature clamping before offset contributions. Change-Id: I6fffc98dc0798542a2702e6a593b850c16561e3b Signed-off-by: Pablo Marquez Tello <[email protected]>
1 parent c66c4d3 commit 88c1594

File tree

13 files changed

+402
-135
lines changed

13 files changed

+402
-135
lines changed

arm_compute/runtime/NEON/functions/NEConvolutionLayer.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ class NEConvolutionLayer : public IFunction
103103
* |QASYMM8 |QASYMM8_SIGNED |S32 |QASYMM8 |
104104
* |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
105105
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
106+
* |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 |
106107
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
107108
*
108109
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -111,7 +112,8 @@ class NEConvolutionLayer : public IFunction
111112
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
112113
* Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL or QASYMM8_SIGNED if input is QASYMM8/QASYMM8_SIGNED.
113114
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
114-
* Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
115+
* Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type and
116+
* for F32 dequantization the bias must be F32.
115117
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
116118
* Data types supported: Same as @p input.
117119
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -140,8 +142,10 @@ class NEConvolutionLayer : public IFunction
140142
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
141143
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
142144
* Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL or QASYMM8_SIGNED if input is QASYMM8/QASYMM8_SIGNED.
143-
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
144145
* Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
146+
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
147+
* Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type and
148+
* for F32 dequantization the bias must be F32.
145149
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
146150
* Data types supported: Same as @p input.
147151
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.

arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ class NEGEMMConvolutionLayer : public IFunction
7878
* |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
7979
* |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
8080
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
81+
* |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 |
8182
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
8283
*
8384
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -87,6 +88,7 @@ class NEGEMMConvolutionLayer : public IFunction
8788
* Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
8889
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
8990
* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
91+
* For F32 dequantization the bias must be F32.
9092
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
9193
* Data types supported: Same as @p input.
9294
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -117,6 +119,7 @@ class NEGEMMConvolutionLayer : public IFunction
117119
* Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
118120
* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
119121
* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
122+
* For F32 dequantization the bias must be F32.
120123
* @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
121124
* Data types supported: Same as @p input.
122125
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.

arm_compute/runtime/experimental/operators/CpuGemmConv2d.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class CpuGemmConv2d : public IOperator
7777
* |QASYMM8 |QASYMM8_SIGNED |S32 |QASYMM8 |
7878
* |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
7979
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
80+
* |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 |
8081
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
8182
*
8283
* @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
@@ -86,6 +87,7 @@ class CpuGemmConv2d : public IOperator
8687
* Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
8788
* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
8889
* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
90+
* For F32 dequantization the bias must be F32.
8991
* @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
9092
* Data types supported: Same as @p input.
9193
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.

docs/user_guide/operator_list.dox

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ where N = batches, C = channels, H = height, W = width, D = depth
610610
<tr><td>QASYMM8<td>QASYMM8_SIGNED<td>S32<td>QASYMM8
611611
<tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
612612
<tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
613+
<tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>F32<td>F32
613614
<tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
614615
</table>
615616
<tr>
@@ -1712,6 +1713,7 @@ where N = batches, C = channels, H = height, W = width, D = depth
17121713
<tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
17131714
<tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
17141715
<tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
1716+
<tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>F32<td>F32
17151717
<tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
17161718
</table>
17171719
<tr>

src/cpu/kernels/gemmlowp/generic/neon/impl.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ void neon_run_offset_contribution_float(const Window &window,
6666
const int window_step_x = 16;
6767

6868
// if vector_sum_col is nullptr then stride_y is 0, else get stride_y
69-
const size_t sum_col_stride_y = (vector_sum_col != nullptr) ? (vector_sum_col->info()->strides_in_bytes().y()) : 0;
70-
Iterator mm_result_it(mm_result, collapsed_window);
69+
const size_t sum_col_stride_w = (vector_sum_col != nullptr) ? vector_sum_col->info()->strides_in_bytes()[3] : 0;
70+
71+
Iterator mm_result_it(mm_result, collapsed_window);
7172

7273
if ((a_offset != 0) && (b_offset != 0) && (vector_sum_col != nullptr) && (vector_sum_row != nullptr)) // true, true
7374
{
@@ -96,7 +97,7 @@ void neon_run_offset_contribution_float(const Window &window,
9697
[&](const Coordinates &id)
9798
{
9899
const int batch_id = id.z() / depth_input;
99-
const size_t batch_offset_col = batch_id * sum_col_stride_y;
100+
const size_t batch_offset_col = batch_id * sum_col_stride_w;
100101
auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_offset_col +
101102
batch_id * vector_sum_col_batch_offset);
102103
auto mm_result_ptr = reinterpret_cast<T *>(mm_result_it.ptr());
@@ -216,7 +217,7 @@ void neon_run_offset_contribution_float(const Window &window,
216217
const int batch_id = id.z() / depth_input;
217218
const size_t batch_offset_col =
218219
batch_id *
219-
sum_col_stride_y; // Value to offset vector_sum_col_ptr to allow for iteration of y values in tensor
220+
sum_col_stride_w; // Value to offset vector_sum_col_ptr to allow for iteration of w values in tensor
220221
auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_offset_col +
221222
batch_id * vector_sum_col_batch_offset);
222223
auto mm_result_ptr = reinterpret_cast<T *>(mm_result_it.ptr());

src/cpu/operators/CpuConv2d.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2023-2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2023-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -88,6 +88,7 @@ class CpuConv2d : public ICpuOperator
8888
* |QASYMM8 |QASYMM8_SIGNED |S32 |QASYMM8 |
8989
* |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
9090
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
91+
* |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 |
9192
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
9293
*
9394
* @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
@@ -97,6 +98,7 @@ class CpuConv2d : public ICpuOperator
9798
* Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL or QASYMM8_SIGNED if input is QASYMM8/QASYMM8_SIGNED.
9899
* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
99100
* Data type supported: Same as @p src, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
101+
* For F32 dequantization the bias must be F32.
100102
* @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
101103
* Data types supported: Same as @p src.
102104
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.

src/cpu/operators/CpuGemmConv2d.cpp

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ void initialize_reshaped_weight_info(const ITensorInfo &weights, ITensorInfo &re
153153
reshaped_weights.set_tensor_shape(collapsed_weights);
154154
}
155155
}
156+
inline bool int8_dequantize_f32_path(DataType src, DataType dst)
157+
{
158+
return src == DataType::QASYMM8_SIGNED && dst == DataType::F32;
159+
}
156160
} // namespace
157161

158162
CpuGemmConv2d::WeightTransformMethod CpuGemmConv2d::get_wt_method(const ITensorInfo &weights)
@@ -287,12 +291,27 @@ void CpuGemmConv2d::configure_mm(const ITensorInfo *src,
287291
}
288292

289293
GEMMLowpOutputStageInfo output_info;
290-
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
291-
output_info.gemmlowp_offset = uoqinfo.offset;
292-
output_info.gemmlowp_min_bound = min_activation;
293-
output_info.gemmlowp_max_bound = max_activation;
294-
output_info.is_quantized_per_channel = (tmp_weights.data_type() == DataType::QSYMM8_PER_CHANNEL);
295-
quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
294+
295+
// F32 dequant path? (input quantized, output float)
296+
if (int8_dequantize_f32_path(data_type, dst->data_type()))
297+
{
298+
// No requant stage; offsets are handled via offset-contribution on int32
299+
output_info.type = GEMMLowpOutputStageType::NONE;
300+
output_info.gemmlowp_offset = 0;
301+
output_info.gemmlowp_min_bound = 0;
302+
output_info.gemmlowp_max_bound = 0;
303+
output_info.is_quantized_per_channel = false; // irrelevant when NONE
304+
}
305+
else
306+
{
307+
// Existing Q->Q path
308+
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
309+
output_info.gemmlowp_offset = uoqinfo.offset;
310+
output_info.gemmlowp_min_bound = min_activation;
311+
output_info.gemmlowp_max_bound = max_activation;
312+
output_info.is_quantized_per_channel = (tmp_weights.data_type() == DataType::QSYMM8_PER_CHANNEL);
313+
quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
314+
}
296315

297316
const GEMMInfo gemm_info =
298317
GEMMInfo(false /* is_a_reshaped */, false /* is_b_reshaped */, true /* reshape_b_only_on_first_run */,
@@ -367,14 +386,28 @@ Status CpuGemmConv2d::validate_mm(const ITensorInfo *src,
367386
{
368387
std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, uoqinfo);
369388
}
370-
389+
// F32 dequant path? (input quantized, output float)
371390
GEMMLowpOutputStageInfo output_info;
372-
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
373-
output_info.gemmlowp_offset = uoqinfo.offset;
374-
output_info.gemmlowp_min_bound = min_activation;
375-
output_info.gemmlowp_max_bound = max_activation;
376-
output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
377-
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info));
391+
if (int8_dequantize_f32_path(data_type, dst->data_type()))
392+
{
393+
// No requant stage; offsets are handled via offset-contribution on int32
394+
output_info.type = GEMMLowpOutputStageType::NONE;
395+
output_info.gemmlowp_offset = 0;
396+
output_info.gemmlowp_min_bound = 0;
397+
output_info.gemmlowp_max_bound = 0;
398+
output_info.is_quantized_per_channel = false; // irrelevant when NONE
399+
}
400+
else
401+
{
402+
// Existing Q->Q path
403+
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
404+
output_info.gemmlowp_offset = uoqinfo.offset;
405+
output_info.gemmlowp_min_bound = min_activation;
406+
output_info.gemmlowp_max_bound = max_activation;
407+
output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
408+
ARM_COMPUTE_RETURN_ON_ERROR(
409+
quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info));
410+
}
378411

379412
// Perform validation step on GEMMLowp
380413
std::unique_ptr<ITensorInfo> input_qa = src->clone();
@@ -506,7 +539,10 @@ void CpuGemmConv2d::configure(const ITensorInfo *src,
506539
const unsigned int mat_weights_cols = weights->dimension(idx_kernels);
507540

508541
// Create temporary GEMM output tensor in case we cannot skip col2im
509-
const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type;
542+
const DataType output_data_type =
543+
data_type == DataType::BFLOAT16 || int8_dequantize_f32_path(data_type, dst->data_type()) ? DataType::F32
544+
: data_type;
545+
510546
if (!_skip_col2im)
511547
{
512548
TensorShape shape_gemm;
@@ -725,7 +761,14 @@ Status CpuGemmConv2d::validate(const ITensorInfo *src,
725761
{
726762
if (is_quantized)
727763
{
728-
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
764+
if (data_type == DataType::QASYMM8_SIGNED && dst->data_type() == DataType::F32)
765+
{
766+
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32);
767+
}
768+
else
769+
{
770+
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
771+
}
729772
}
730773
else if (is_bf16)
731774
{
@@ -777,7 +820,9 @@ Status CpuGemmConv2d::validate(const ITensorInfo *src,
777820
}
778821

779822
// Create temporary GEMM output tensor in case we cannot skip col2im
780-
const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type;
823+
const DataType output_data_type =
824+
data_type == DataType::BFLOAT16 || int8_dequantize_f32_path(data_type, dst->data_type()) ? DataType::F32
825+
: data_type;
781826
if (!skip_col2im)
782827
{
783828
TensorShape shape_gemm = gemm_input_to_use->tensor_shape();

0 commit comments

Comments
 (0)