Skip to content

Commit 8775379

Browse files
gunes-armmorgolock
authored andcommitted
fix: Handle padding updates after configure() in CpuActivation
Partially Resolves: ARMCL-1199 Signed-off-by: Gunes Bayir <[email protected]> Change-Id: I0a978407419cac86eb4b0499aa0234b316cf26b6
1 parent 1a189fb commit 8775379

File tree

7 files changed

+216
-20
lines changed

7 files changed

+216
-20
lines changed

src/cpu/kernels/CpuActivationKernel.cpp

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,17 +201,19 @@ void init_lut(ActivationLayerInfo::ActivationFunction act_func,
201201
void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info)
202202
{
203203
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivationKernel::configure");
204-
ARM_COMPUTE_UNUSED(dst);
205204
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
206205
ARM_COMPUTE_ERROR_THROW_ON(CpuActivationKernel::validate(src, dst, activation_info));
207206

208207
heuristics::CpuActivationKernelHeuristics heuristics(src, dst, activation_info);
209208
_heuristics = std::move(heuristics);
210209

211-
if (dst != nullptr)
210+
_src_padding = src->padding();
211+
_inplace = (dst == nullptr);
212+
if (!_inplace)
212213
{
213214
// dst auto inizialitation if not yet initialized
214215
auto_init_if_empty(*dst, *src->clone());
216+
_dst_padding = dst->padding();
215217
}
216218

217219
const auto *uk = _heuristics.kernel();
@@ -234,6 +236,7 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac
234236
activation_info.setLookupTable256(tmp_lut);
235237
}
236238

239+
// Kernel specific logic should be mirrored in prepare()
237240
if (std::string(uk->name) == "sve_fp16_activation_lut")
238241
{
239242
// Create info using init list.
@@ -264,6 +267,50 @@ size_t CpuActivationKernel::get_mws(const CPUInfo &platform, size_t thread_count
264267
return _heuristics.mws();
265268
}
266269

270+
void CpuActivationKernel::prepare(ITensorPack &tensors)
271+
{
272+
const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC);
273+
ITensor *dst = tensors.get_tensor(TensorType::ACL_DST);
274+
275+
const ITensorInfo *src_info = src->info();
276+
const ITensorInfo *dst_info = dst->info();
277+
278+
const bool src_padding_changed = (src_info->padding() != _src_padding);
279+
const bool dst_padding_changed = (!_inplace && dst_info->padding() != _dst_padding);
280+
281+
if (src_padding_changed || dst_padding_changed)
282+
{
283+
// If padding has changed after configuration, recalculate the heuristics
284+
const auto kernel_before_padding_change = _heuristics.kernel();
285+
heuristics::CpuActivationKernelHeuristics heuristics(src_info, dst_info, _act_info);
286+
_heuristics = std::move(heuristics);
287+
const auto kernel_after_padding_change = _heuristics.kernel();
288+
289+
if (kernel_before_padding_change != kernel_after_padding_change)
290+
{
291+
// Kernel specific logic in configure must be repeated
292+
const auto *uk = _heuristics.kernel();
293+
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
294+
295+
_name = std::string("CpuActivationKernel").append("/").append(uk->name);
296+
297+
#ifdef __aarch64__
298+
if (std::string(uk->name) == "sve_fp16_activation_lut")
299+
{
300+
// Create info using init list.
301+
LUTManager &lut_manager = LUTManager::get_instance();
302+
const LUTInfo info = {_act_info.activation(), _act_info.a(), _act_info.b(), src_info->data_type(),
303+
src_info->quantization_info().uniform()};
304+
_act_info.setLookupTable65536((lut_manager.get_lut_table<LookupTable65536>(info)));
305+
}
306+
#endif // __aarch64__
307+
}
308+
309+
// Re-register the window
310+
ICPPKernel::configure(_heuristics.window());
311+
}
312+
}
313+
267314
void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
268315
{
269316
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivationKernel::run_op");

src/cpu/kernels/CpuActivationKernel.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2024 Arm Limited.
2+
* Copyright (c) 2017-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -24,6 +24,7 @@
2424
#ifndef ACL_SRC_CPU_KERNELS_CPUACTIVATIONKERNEL_H
2525
#define ACL_SRC_CPU_KERNELS_CPUACTIVATIONKERNEL_H
2626

27+
#include "arm_compute/core/Types.h"
2728
#include "arm_compute/function_info/ActivationLayerInfo.h"
2829

2930
#include "src/core/common/Macros.h"
@@ -86,10 +87,20 @@ class CpuActivationKernel : public ICPPKernel
8687
return _heuristics.scheduler_hint().split_dimension();
8788
}
8889

90+
/** Prepare the activation kernel for execution (Only executed once)
91+
*
92+
* @param[in] tensors Pack of input and output tensors
93+
*
94+
*/
95+
void prepare(ITensorPack &tensors);
96+
8997
private:
9098
ActivationLayerInfo _act_info{};
9199
std::string _name{};
92100
heuristics::CpuActivationKernelHeuristics _heuristics{};
101+
PaddingSize _src_padding{};
102+
PaddingSize _dst_padding{};
103+
bool _inplace{};
93104
};
94105
} // namespace kernels
95106
} // namespace cpu

src/cpu/kernels/activation/heuristics/CpuActivationKernelHeuristics.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2024 Arm Limited.
2+
* Copyright (c) 2017-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -318,8 +318,6 @@ CpuActivationKernelHeuristics::CpuActivationKernelHeuristics(const ITensorInfo
318318
const ITensorInfo *dst,
319319
const ActivationLayerInfo &activation_info)
320320
{
321-
ARM_COMPUTE_UNUSED(dst);
322-
323321
// Set kernel
324322
const DataType dtype = src->data_type();
325323
ActivationDataTypeISASelectorData selector{dtype, CPUInfo::get().get_cpu_model(), CPUInfo::get().get_isa(),
@@ -329,7 +327,8 @@ CpuActivationKernelHeuristics::CpuActivationKernelHeuristics(const ITensorInfo
329327

330328
// Set window and scheduling hint
331329
int split_dim;
332-
std::tie(_window, split_dim) = calculate_squashed_or_max_window(*src);
330+
std::tie(_window, split_dim) =
331+
dst == nullptr ? calculate_squashed_or_max_window(*src) : calculate_squashed_or_max_window(*src, *dst);
333332

334333
// Collapse window with SME kernels in Y-Dim
335334
if (std::string(_kernel->name) == "sme2_fp32_logistic")

src/cpu/operators/CpuActivation.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ void CpuActivation::configure(const ITensorInfo *input, ITensorInfo *output, con
4040
{
4141
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivation::configure");
4242
ARM_COMPUTE_LOG_PARAMS(input, output, activation_info);
43-
auto k = std::make_unique<kernels::CpuActivationKernel>();
43+
44+
_is_prepared = false;
45+
auto k = std::make_unique<kernels::CpuActivationKernel>();
4446
k->configure(input, output, activation_info);
4547
_kernel = std::move(k);
4648
}
@@ -56,7 +58,15 @@ void CpuActivation::run(ITensorPack &tensors)
5658
{
5759
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivation::run");
5860
ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
59-
auto split_dimension = static_cast<kernels::CpuActivationKernel *>(_kernel.get())->get_split_dimension_hint();
61+
62+
auto kernel_casted = static_cast<kernels::CpuActivationKernel *>(_kernel.get());
63+
if (!_is_prepared)
64+
{
65+
kernel_casted->prepare(tensors);
66+
_is_prepared = true;
67+
}
68+
69+
const size_t split_dimension = kernel_casted->get_split_dimension_hint();
6070
NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors);
6171
}
6272

src/cpu/operators/CpuActivation.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2023 Arm Limited.
2+
* Copyright (c) 2021-2023, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,8 +21,8 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24-
#ifndef ARM_COMPUTE_CPU_ACTIVATION_H
25-
#define ARM_COMPUTE_CPU_ACTIVATION_H
24+
#ifndef ACL_SRC_CPU_OPERATORS_CPUACTIVATION_H
25+
#define ACL_SRC_CPU_OPERATORS_CPUACTIVATION_H
2626

2727
#include "arm_compute/function_info/ActivationLayerInfo.h"
2828

@@ -53,7 +53,10 @@ class CpuActivation : public ICpuOperator
5353

5454
// Inherited methods overridden:
5555
void run(ITensorPack &tensors) override;
56+
57+
private:
58+
bool _is_prepared{};
5659
};
5760
} // namespace cpu
5861
} // namespace arm_compute
59-
#endif /* ARM_COMPUTE_CPU_ACTIVATION_H */
62+
#endif // ACL_SRC_CPU_OPERATORS_CPUACTIVATION_H

tests/validation/NEON/ActivationLayer.cpp

Lines changed: 96 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ namespace test
5050
{
5151
namespace validation
5252
{
53+
using framework::dataset::make;
5354
namespace
5455
{
5556

@@ -62,6 +63,11 @@ const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions
6263

6364
/** Input data sets. */
6465
const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), NeonActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
66+
const auto ActivationDatasetForPaddingAfterConfigure = combine(
67+
make("InPlace", { false, true }),
68+
NeonActivationFunctionsDataset,
69+
make("AlphaBeta", { 0.5f })
70+
);
6571

6672
template <typename T, ARM_COMPUTE_REQUIRES_TA(arm_compute::utils::traits::is_floating_point<T>::value)>
6773
void test_float_sqrt_boundary_value()
@@ -181,6 +187,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
181187

182188
template <typename T>
183189
using NEActivationLayerFixture = ActivationValidationFixture<Tensor, Accessor, NEActivationLayer, T>;
190+
template <typename T>
191+
using NEActivationLayerWithPaddingFixture = ActivationWithPaddingValidationFixture<Tensor, Accessor, NEActivationLayer, T>;
184192

185193
TEST_SUITE(Float)
186194
#ifdef ARM_COMPUTE_ENABLE_FP16
@@ -204,6 +212,25 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<half>, framework::Data
204212
framework::ARM_COMPUTE_PRINT_INFO();
205213
}
206214
}
215+
216+
FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingFixture<half>, framework::DatasetMode::ALL,
217+
combine(
218+
make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
219+
ActivationDatasetForPaddingAfterConfigure,
220+
make("DataType", DataType::F16))
221+
)
222+
{
223+
if(CPUInfo::get().has_fp16())
224+
{
225+
// Validate output
226+
validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
227+
}
228+
else
229+
{
230+
ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
231+
framework::ARM_COMPUTE_PRINT_INFO();
232+
}
233+
}
207234
TEST_SUITE_END() // FP16
208235
#endif /* ARM_COMPUTE_ENABLE_FP16 */
209236

@@ -212,28 +239,45 @@ TEST_CASE(SqrtBoundaryValue, framework::DatasetMode::ALL)
212239
{
213240
test_float_sqrt_boundary_value<float>();
214241
}
215-
FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType",
242+
FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ActivationDataset), make("DataType",
216243
DataType::F32)))
217244

218245
{
219246
// Validate output
220247
validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
221248
}
249+
250+
FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingFixture<float>, framework::DatasetMode::ALL,
251+
combine(
252+
make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
253+
ActivationDatasetForPaddingAfterConfigure,
254+
make("DataType", DataType::F32))
255+
)
256+
{
257+
validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
258+
}
222259
// Run only on SME Devices to stress Logistic SME kernel
223260
#ifdef ARM_COMPUTE_ENABLE_SME2
224261
TEST_SUITE(SME)
225-
const auto LogsisticDataset = combine(framework::dataset::make("InPlace", { false }), framework::dataset::make("Function", ActivationLayerInfo::ActivationFunction::LOGISTIC), framework::dataset::make("AlphaBeta", { 1.f }));
226-
FIXTURE_DATA_TEST_CASE(RunLogistic5D, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::Tiny5dShapes(), LogsisticDataset, framework::dataset::make("DataType",
262+
const auto LogisticDataset = combine(make("InPlace", { false }), make("Function", ActivationLayerInfo::ActivationFunction::LOGISTIC), make("AlphaBeta", { 1.f }));
263+
FIXTURE_DATA_TEST_CASE(RunLogistic5D, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::Tiny5dShapes(), LogisticDataset, make("DataType",
227264
DataType::F32)))
228265

229266
{
230267
// Validate output
231268
validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
232269
}
233270

234-
FIXTURE_DATA_TEST_CASE(RunLogisticSME, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::LogisticSMEStressShapesFp32(), LogsisticDataset, framework::dataset::make("DataType",
271+
FIXTURE_DATA_TEST_CASE(RunLogisticSME, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::LogisticSMEStressShapesFp32(), LogisticDataset, make("DataType",
235272
DataType::F32)))
236273

274+
{
275+
// Validate output
276+
validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
277+
}
278+
FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingFixture<float>, framework::DatasetMode::ALL,
279+
combine(datasets::LogisticSMEStressShapesFp32(), LogisticDataset, make("DataType", DataType::F32)))
280+
237281
{
238282
// Validate output
239283
validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
@@ -245,6 +289,8 @@ TEST_SUITE_END() // Float
245289

246290
template <typename T>
247291
using NEActivationLayerQuantizedFixture = ActivationValidationQuantizedFixture<Tensor, Accessor, NEActivationLayer, T>;
292+
template <typename T>
293+
using NEActivationLayerWithPaddingQuantizedFixture = ActivationWithPaddingValidationQuantizedFixture<Tensor, Accessor, NEActivationLayer, T>;
248294

249295
/** Input data sets. */
250296
const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction",
@@ -263,6 +309,13 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ
263309
const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }),
264310
concat(QuantizedActivationFunctionsDataset, framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH))),
265311
framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
312+
const auto QuantizedActivationDatasetForPaddingAfterConfigure = combine(
313+
make("InPlace", { false }),
314+
concat(QuantizedActivationFunctionsDataset,
315+
make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH)
316+
),
317+
make("AlphaBeta", { 0.5f})
318+
);
266319

267320
TEST_SUITE(Quantized)
268321
TEST_SUITE(QASYMM8)
@@ -274,6 +327,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<uint8_t>, fra
274327
// Validate output
275328
validate(Accessor(_target), _reference, helper::tolerance_qasymm8(_function));
276329
}
330+
FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
331+
combine(
332+
make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
333+
QuantizedActivationDatasetForPaddingAfterConfigure,
334+
make("DataType", DataType::QASYMM8),
335+
make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) }
336+
)))
337+
{
338+
// Validate output
339+
validate(Accessor(_target), _reference, helper::tolerance_qasymm8(_function));
340+
}
277341
TEST_SUITE_END() // QASYMM8
278342

279343
TEST_SUITE(QASYMM8_SIGNED)
@@ -285,6 +349,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<int8_t>, fram
285349
// Validate output
286350
validate(Accessor(_target), _reference, helper::tolerance_qasymm8(_function));
287351
}
352+
FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
353+
combine(
354+
make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
355+
QuantizedActivationDatasetForPaddingAfterConfigure,
356+
make("DataType", DataType::QASYMM8_SIGNED),
357+
make("QuantizationInfo", { QuantizationInfo(0.5f, 10.0f) }
358+
)))
359+
{
360+
// Validate output
361+
validate(Accessor(_target), _reference, helper::tolerance_qasymm8(_function));
362+
}
288363
TEST_SUITE_END() // QASYMM8_SIGNED
289364

290365
/** Input data sets. */
@@ -297,6 +372,12 @@ const auto Int16QuantizedActivationFunctionsDataset = framework::dataset::make("
297372
const auto Int16QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }), Int16QuantizedActivationFunctionsDataset),
298373
framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
299374

375+
const auto Int16QuantizedActivationDatasetForPaddingAfterConfigure = combine(
376+
make("InPlace", { false }),
377+
Int16QuantizedActivationFunctionsDataset,
378+
make("AlphaBeta", { 0.5f })
379+
);
380+
300381
TEST_SUITE(QSYMM16)
301382
FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), Int16QuantizedActivationDataset),
302383
framework::dataset::make("DataType",
@@ -306,6 +387,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<int16_t>, fra
306387
// Validate output
307388
validate(Accessor(_target), _reference, tolerance_qsymm16);
308389
}
390+
FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingQuantizedFixture<int16_t>, framework::DatasetMode::ALL,
391+
combine(
392+
make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
393+
Int16QuantizedActivationDatasetForPaddingAfterConfigure,
394+
make("DataType", DataType::QSYMM16),
395+
make("QuantizationInfo", { QuantizationInfo(1.f / 32768.f, 0.f) }))
396+
)
397+
{
398+
// Validate output
399+
validate(Accessor(_target), _reference, tolerance_qsymm16);
400+
}
309401
TEST_SUITE_END() // QSYMM16
310402
TEST_SUITE_END() // Quantized
311403

0 commit comments

Comments
 (0)