Skip to content

Commit a1d5f71

Browse files
committed
fix: Do not mutate shared _gemm_output_3d in CpuGemmConv2d::run()
CpuGemmConv2d::run() was mutating the shared member _gemm_output_3d by extending its padding before soft_init()/import_memory(). When the same operator instance is reused across runs, this can cause later extend_padding() calls to fail. It is also unsafe when the operator is used from multiple threads. Use a local TensorInfo copy in run() for padding extension and soft_init()/import_memory(), leaving _gemm_output_3d unchanged. Added a new test: RepeatedRunDoesNotReuseImportedGemm3dTensorInfo. Change-Id: I3e4e2d25cabf85724ecf126b1c93df6733ee7d48 Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
1 parent 7d0d25f commit a1d5f71

File tree

2 files changed

+67
-3
lines changed

2 files changed

+67
-3
lines changed

src/cpu/operators/CpuGemmConv2d.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -939,8 +939,10 @@ void CpuGemmConv2d::run(ITensorPack &tensors)
939939
// Handle the case where output has top/bottom padding
940940
const ITensor *out_to_use = out_has_padding ? gemm_output.get() : dst;
941941
Tensor gemm3d;
942-
_gemm_output_3d.extend_padding(out_to_use->info()->padding());
943-
gemm3d.allocator()->soft_init(_gemm_output_3d);
942+
TensorInfo gemm3d_info(_gemm_output_3d);
943+
gemm3d_info.set_is_resizable(true);
944+
gemm3d_info.extend_padding(out_to_use->info()->padding());
945+
gemm3d.allocator()->soft_init(gemm3d_info);
944946
gemm3d.allocator()->import_memory(out_to_use->buffer());
945947
auto gemm_output_to_use = gemm_output.get();
946948

tests/validation/NEON/ConvolutionLayer.cpp

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1437,7 +1437,69 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
14371437
}
14381438
}
14391439

1440-
/** Test case for memory injection in @ref NEGEMMConvolutionLayer.
1440+
/** Regression test for repeated runs in cpu::CpuGemmConv2d.
1441+
*
1442+
* Configure the operator once and execute it twice with injected memory.
1443+
*
1444+
* Checks performed in order:
1445+
* - The first run does not throw
1446+
* - The second run does not throw
1447+
* - Both runs compute the same output
1448+
*/
1449+
TEST_CASE(RepeatedRunDoesNotReuseImportedGemm3dTensorInfo, framework::DatasetMode::ALL)
1450+
{
1451+
auto conv = std::make_unique<cpu::CpuGemmConv2d>();
1452+
const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
1453+
const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
1454+
const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
1455+
auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
1456+
const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
1457+
WeightsInfo weights_info(false, 3U, 3U, 1U);
1458+
conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);
1459+
1460+
auto src = create_tensor<Tensor>(src_info);
1461+
auto weight = create_tensor<Tensor>(weight_info);
1462+
auto bias = create_tensor<Tensor>(bias_info);
1463+
src.allocator()->allocate();
1464+
weight.allocator()->allocate();
1465+
bias.allocator()->allocate();
1466+
1467+
ITensorPack run_pack{
1468+
{TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weight}, {TensorType::ACL_SRC_2, &bias}};
1469+
ITensorPack prep_pack{{TensorType::ACL_SRC_1, &weight}, {TensorType::ACL_SRC_2, &bias}};
1470+
1471+
auto mg = MemoryGroup{};
1472+
auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
1473+
1474+
auto run_conv = [&](Tensor &dst) -> bool
1475+
{
1476+
run_pack.add_tensor(TensorType::ACL_DST, &dst);
1477+
1478+
library->fill_tensor_value(Accessor(src), 1.f);
1479+
library->fill_tensor_value(Accessor(weight), 2.f);
1480+
library->fill_tensor_value(Accessor(bias), 3.f);
1481+
conv->prepare(prep_pack);
1482+
conv->run(run_pack);
1483+
return true;
1484+
};
1485+
1486+
auto result_0 = create_tensor<Tensor>(dst_info);
1487+
auto result_1 = create_tensor<Tensor>(dst_info);
1488+
result_0.allocator()->allocate();
1489+
result_1.allocator()->allocate();
1490+
1491+
ARM_COMPUTE_EXPECT_NO_THROW(run_conv(result_0), framework::LogLevel::ERRORS);
1492+
ARM_COMPUTE_EXPECT_NO_THROW(run_conv(result_1), framework::LogLevel::ERRORS);
1493+
1494+
for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1495+
{
1496+
ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] ==
1497+
reinterpret_cast<float *>(result_1.buffer())[i],
1498+
framework::LogLevel::ERRORS);
1499+
}
1500+
}
1501+
1502+
/** Test case for memory injection in NEGEMMConvolutionLayer.
14411503
*
14421504
* Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
14431505
*

0 commit comments

Comments
 (0)