fix: Do not mutate shared _gemm_output_3d in CpuGemmConv2d::run()

morgolock · morgolock · commit a1d5f71259e4 · 2026-04-02T16:12:05.000+01:00
CpuGemmConv2d::run() was mutating the shared member _gemm_output_3d by
extending its padding before soft_init()/import_memory().

When the same operator instance is reused across runs, this can cause
later extend_padding() calls to fail. It is also unsafe when the operator
is used from multiple threads.

Use a local TensorInfo copy in run() for padding extension and
soft_init()/import_memory(), leaving _gemm_output_3d unchanged.

Added a new test: RepeatedRunDoesNotReuseImportedGemm3dTensorInfo.

Change-Id: I3e4e2d25cabf85724ecf126b1c93df6733ee7d48
Signed-off-by: Pablo Marquez Tello &lt;pablo.tello@arm.com&gt;
diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp
@@ -939,8 +939,10 @@ void CpuGemmConv2d::run(ITensorPack &tensors)
     // Handle the case where output has top/bottom padding
     const ITensor *out_to_use = out_has_padding ? gemm_output.get() : dst;
     Tensor         gemm3d;
-    _gemm_output_3d.extend_padding(out_to_use->info()->padding());
-    gemm3d.allocator()->soft_init(_gemm_output_3d);
+    TensorInfo     gemm3d_info(_gemm_output_3d);
+    gemm3d_info.set_is_resizable(true);
+    gemm3d_info.extend_padding(out_to_use->info()->padding());
+    gemm3d.allocator()->soft_init(gemm3d_info);
     gemm3d.allocator()->import_memory(out_to_use->buffer());
     auto gemm_output_to_use = gemm_output.get();
 
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -1437,7 +1437,69 @@ TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
     }
 }
 
-/** Test case for memory injection in @ref NEGEMMConvolutionLayer.
+/** Regression test for repeated runs in cpu::CpuGemmConv2d.
+ *
+ * Configure the operator once and execute it twice with injected memory.
+ *
+ * Checks performed in order:
+ * - The first run does not throw
+ * - The second run does not throw
+ * - Both runs compute the same output
+ */
+TEST_CASE(RepeatedRunDoesNotReuseImportedGemm3dTensorInfo, framework::DatasetMode::ALL)
+{
+    auto        conv        = std::make_unique<cpu::CpuGemmConv2d>();
+    const auto  src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
+    const auto  weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
+    const auto  bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
+    auto        dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
+    const auto  conv_info   = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
+    WeightsInfo weights_info(false, 3U, 3U, 1U);
+    conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);
+
+    auto src    = create_tensor<Tensor>(src_info);
+    auto weight = create_tensor<Tensor>(weight_info);
+    auto bias   = create_tensor<Tensor>(bias_info);
+    src.allocator()->allocate();
+    weight.allocator()->allocate();
+    bias.allocator()->allocate();
+
+    ITensorPack run_pack{
+        {TensorType::ACL_SRC_0, &src}, {TensorType::ACL_SRC_1, &weight}, {TensorType::ACL_SRC_2, &bias}};
+    ITensorPack prep_pack{{TensorType::ACL_SRC_1, &weight}, {TensorType::ACL_SRC_2, &bias}};
+
+    auto mg = MemoryGroup{};
+    auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
+
+    auto run_conv = [&](Tensor &dst) -> bool
+    {
+        run_pack.add_tensor(TensorType::ACL_DST, &dst);
+
+        library->fill_tensor_value(Accessor(src), 1.f);
+        library->fill_tensor_value(Accessor(weight), 2.f);
+        library->fill_tensor_value(Accessor(bias), 3.f);
+        conv->prepare(prep_pack);
+        conv->run(run_pack);
+        return true;
+    };
+
+    auto result_0 = create_tensor<Tensor>(dst_info);
+    auto result_1 = create_tensor<Tensor>(dst_info);
+    result_0.allocator()->allocate();
+    result_1.allocator()->allocate();
+
+    ARM_COMPUTE_EXPECT_NO_THROW(run_conv(result_0), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT_NO_THROW(run_conv(result_1), framework::LogLevel::ERRORS);
+
+    for (size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
+    {
+        ARM_COMPUTE_EXPECT(reinterpret_cast<float *>(result_0.buffer())[i] ==
+                               reinterpret_cast<float *>(result_1.buffer())[i],
+                           framework::LogLevel::ERRORS);
+    }
+}
+
+/** Test case for memory injection in NEGEMMConvolutionLayer.
  *
  * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
  *