diff --git a/Android.bp b/Android.bp index 39ba145195..8a23f593dc 100644 --- a/Android.bp +++ b/Android.bp @@ -462,6 +462,7 @@ cc_library_static { "src/cpu/kernels/CpuScatterKernel.cpp", "src/cpu/kernels/CpuSoftmaxKernel.cpp", "src/cpu/kernels/CpuSubKernel.cpp", + "src/cpu/kernels/CpuTopKVKernel.cpp", "src/cpu/kernels/CpuTransposeKernel.cpp", "src/cpu/kernels/CpuWeightsReshapeKernel.cpp", "src/cpu/kernels/CpuWinogradConv2dKernel.cpp", @@ -611,6 +612,11 @@ cc_library_static { "src/cpu/kernels/sub/neon/qasymm8.cpp", "src/cpu/kernels/sub/neon/qasymm8_signed.cpp", "src/cpu/kernels/sub/neon/qsymm16.cpp", + "src/cpu/kernels/topkv/generic/neon/fp16.cpp", + "src/cpu/kernels/topkv/generic/neon/fp32.cpp", + "src/cpu/kernels/topkv/generic/neon/integer.cpp", + "src/cpu/kernels/topkv/generic/neon/qasymm8.cpp", + "src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp", "src/cpu/operators/CpuActivation.cpp", "src/cpu/operators/CpuAdd.cpp", "src/cpu/operators/CpuAddMulAdd.cpp", @@ -649,6 +655,7 @@ cc_library_static { "src/cpu/operators/CpuScatter.cpp", "src/cpu/operators/CpuSoftmax.cpp", "src/cpu/operators/CpuSub.cpp", + "src/cpu/operators/CpuTopKV.cpp", "src/cpu/operators/CpuTranspose.cpp", "src/cpu/operators/CpuWinogradConv2d.cpp", "src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp", @@ -978,6 +985,7 @@ cc_library_static { "src/runtime/NEON/functions/NEStackLayer.cpp", "src/runtime/NEON/functions/NEStridedSlice.cpp", "src/runtime/NEON/functions/NETile.cpp", + "src/runtime/NEON/functions/NETopKV.cpp", "src/runtime/NEON/functions/NETranspose.cpp", "src/runtime/NEON/functions/NEUnstack.cpp", "src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp", diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index 49a6e9fefb..82388da2d7 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2025 Arm Limited. + * Copyright (c) 2016-2026 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -112,6 +112,7 @@ #include "arm_compute/runtime/NEON/functions/NEStackLayer.h" #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include "arm_compute/runtime/NEON/functions/NETile.h" +#include "arm_compute/runtime/NEON/functions/NETopKV.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" #include "arm_compute/runtime/NEON/functions/NEUnstack.h" #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" diff --git a/arm_compute/runtime/NEON/functions/NETopKV.h b/arm_compute/runtime/NEON/functions/NETopKV.h new file mode 100644 index 0000000000..bdf9b10cd1 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NETopKV.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H + +/** @file + * @publicapi + */ + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +// Forward declarations +class ITensor; +class ITensorInfo; + +/** Basic function to run cpu::kernels::CpuTopKVKernel + * + */ +class NETopKV : public IFunction +{ +public: + /** Constructor */ + NETopKV(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETopKV(const NETopKV &) = delete; + /** Default move constructor */ + NETopKV(NETopKV &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETopKV &operator=(const NETopKV &) = delete; + /** Default move assignment operator */ + NETopKV &operator=(NETopKV &&); + /** Destructor */ + ~NETopKV(); + /** Set the input and output of the kernel. + * + * @param[in] predictions A batch_size x classes tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED/S32 + * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: U32 + * @param[out] output Computed precision at @p k as a bool 1D tensor. Data types supported: U8 + * @param[in] k Number of top elements to look at for computing precision. + */ + void configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k); + + /** Static function to check if given info will lead to a valid configuration. + * + * Similar to @ref NETopKV::configure() + * + * @return a status + */ + static Status + validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox index c3a3ac59f4..0e4dc0ef64 100644 --- a/docs/user_guide/operator_list.dox +++ b/docs/user_guide/operator_list.dox @@ -3219,6 +3219,55 @@ where N = batches, C = channels, H = height, W = width, D = depth srcdst AllAll + + TopKV + Function to perform TopKV. + + + + NETopKV + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
src0src1dst
QASYMM8U32U8
QASYMM8_SIGNEDU32U8
S32U32U8
F16F32U8
F32U32U8
+ + Transpose Function to transpose a 2D tensor. diff --git a/filelist.json b/filelist.json index 9cf4b72eb1..1fea5a80e7 100644 --- a/filelist.json +++ b/filelist.json @@ -2453,6 +2453,27 @@ ] } }, + "TopKV": { + "files": { + "common": [ + "src/cpu/kernels/CpuTopKVKernel.cpp", + "src/cpu/operators/CpuTopKV.cpp", + "src/runtime/NEON/functions/NETopKV.cpp" + ], + "neon": { + "fp16": [ "src/cpu/kernels/topkv/generic/neon/fp16.cpp" ], + "fp32": [ "src/cpu/kernels/topkv/generic/neon/fp32.cpp" ], + "integer":["src/cpu/kernels/topkv/generic/neon/integer.cpp"], + "qasymm8": [ + "src/cpu/kernels/topkv/generic/neon/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp" + ] + } + + } + }, "Transpose": { "files": { "common": [ diff --git a/src/BUILD.bazel b/src/BUILD.bazel index 3c0b83369a..0c2a0cda99 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -738,6 +738,7 @@ filegroup( "cpu/kernels/CpuScatterKernel.cpp", "cpu/kernels/CpuSoftmaxKernel.cpp", "cpu/kernels/CpuSubKernel.cpp", + "cpu/kernels/CpuTopKVKernel.cpp", "cpu/kernels/CpuTransposeKernel.cpp", "cpu/kernels/CpuWeightsReshapeKernel.cpp", "cpu/kernels/CpuWinogradConv2dKernel.cpp", @@ -848,6 +849,10 @@ filegroup( "cpu/kernels/sub/neon/qasymm8.cpp", "cpu/kernels/sub/neon/qasymm8_signed.cpp", "cpu/kernels/sub/neon/qsymm16.cpp", + "cpu/kernels/topkv/generic/neon/fp32.cpp", + "cpu/kernels/topkv/generic/neon/integer.cpp", + "cpu/kernels/topkv/generic/neon/qasymm8.cpp", + "cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp", "cpu/operators/CpuActivation.cpp", "cpu/operators/CpuAdd.cpp", "cpu/operators/CpuAddMulAdd.cpp", @@ -886,6 +891,7 @@ filegroup( "cpu/operators/CpuScatter.cpp", "cpu/operators/CpuSoftmax.cpp", "cpu/operators/CpuSub.cpp", + "cpu/operators/CpuTopKV.cpp", "cpu/operators/CpuTranspose.cpp", "cpu/operators/CpuWinogradConv2d.cpp", "cpu/operators/internal/CpuGemmAssemblyDispatch.cpp", @@ -994,6 +1000,7 @@ filegroup( "runtime/NEON/functions/NEStackLayer.cpp", "runtime/NEON/functions/NEStridedSlice.cpp", "runtime/NEON/functions/NETile.cpp", + "runtime/NEON/functions/NETopKV.cpp", "runtime/NEON/functions/NETranspose.cpp", "runtime/NEON/functions/NEUnstack.cpp", "runtime/NEON/functions/NEWinogradConvolutionLayer.cpp", @@ -1109,7 +1116,8 @@ filegroup( "cpu/kernels/scatter/generic/neon/fp16.cpp", "cpu/kernels/select/generic/neon/fp16.cpp", "cpu/kernels/softmax/generic/neon/fp16.cpp", - "cpu/kernels/sub/neon/fp16.cpp"] + + "cpu/kernels/sub/neon/fp16.cpp", + "cpu/kernels/topkv/generic/neon/fp16.cpp"] + glob(["**/*.h", "**/*.hpp", "**/*.inl"]), diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 74847f6878..bba1506afe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -732,6 +732,7 @@ target_sources( cpu/kernels/CpuScatterKernel.cpp cpu/kernels/CpuSoftmaxKernel.cpp cpu/kernels/CpuSubKernel.cpp + cpu/kernels/CpuTopKVKernel.cpp cpu/kernels/CpuTransposeKernel.cpp cpu/kernels/CpuWeightsReshapeKernel.cpp cpu/kernels/CpuWinogradConv2dKernel.cpp @@ -842,6 +843,10 @@ target_sources( cpu/kernels/sub/neon/qasymm8.cpp cpu/kernels/sub/neon/qasymm8_signed.cpp cpu/kernels/sub/neon/qsymm16.cpp + cpu/kernels/topkv/generic/neon/fp32.cpp + cpu/kernels/topkv/generic/neon/integer.cpp + cpu/kernels/topkv/generic/neon/qasymm8.cpp + cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp cpu/operators/CpuActivation.cpp cpu/operators/CpuAdd.cpp cpu/operators/CpuAddMulAdd.cpp @@ -880,6 +885,7 @@ target_sources( cpu/operators/CpuScatter.cpp cpu/operators/CpuSoftmax.cpp cpu/operators/CpuSub.cpp + cpu/operators/CpuTopKV.cpp cpu/operators/CpuTranspose.cpp cpu/operators/CpuWinogradConv2d.cpp cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -988,6 +994,7 @@ target_sources( runtime/NEON/functions/NEStackLayer.cpp runtime/NEON/functions/NEStridedSlice.cpp runtime/NEON/functions/NETile.cpp + runtime/NEON/functions/NETopKV.cpp runtime/NEON/functions/NETranspose.cpp runtime/NEON/functions/NEUnstack.cpp runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -1109,4 +1116,5 @@ target_sources( cpu/kernels/select/generic/neon/fp16.cpp cpu/kernels/softmax/generic/neon/fp16.cpp cpu/kernels/sub/neon/fp16.cpp + cpu/kernels/topkv/generic/neon/fp16.cpp ) \ No newline at end of file diff --git a/src/cpu/kernels/CpuTopKVKernel.cpp b/src/cpu/kernels/CpuTopKVKernel.cpp new file mode 100644 index 0000000000..e62f12d65c --- /dev/null +++ b/src/cpu/kernels/CpuTopKVKernel.cpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/cpu/kernels/CpuTopKVKernel.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" + +#include "src/common/utils/profile/acl_profile.h" +#include "src/core/common/Registrars.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/cpu/kernels/topkv/list.h" + +namespace arm_compute +{ +namespace cpu +{ + +namespace kernels +{ +namespace +{ + +static const std::vector available_kernels = { + {"neon_s32_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::S32); }, + REGISTER_FP32_NEON(arm_compute::cpu::topkv_s32_neon)}, + {"neon_fp32_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F32); }, + REGISTER_FP32_NEON(arm_compute::cpu::topkv_fp32_neon)}, + {"neon_fp16_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F16) && data.isa.fp16; }, + REGISTER_FP16_NEON(arm_compute::cpu::topkv_fp16_neon)}, + {"neon_qu8_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8); }, + REGISTER_QASYMM8_NEON(arm_compute::cpu::topkv_qasymm8_neon)}, + {"neon_qs8_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8_SIGNED); }, + REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::topkv_qasymm8_signed_neon)}}; + +Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, uint32_t k) +{ + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src0); + + // src0: predictions (logical shape [C, N], where N defaults to 1 if dimension 1 is absent) + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S32, DataType::F16, DataType::F32); + + // src1: targets (class indices) + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src1, 1, DataType::U32); + + // predictions must at least have C + ARM_COMPUTE_RETURN_ERROR_ON_MSG(src0.num_dimensions() < 1, "predictions must have at least 1 dimension (C)"); + + const unsigned int C = src0.tensor_shape()[0]; // classes + const unsigned int N = src0.tensor_shape()[1]; // batch (defaults to 1 if not present) + + // k constraints + ARM_COMPUTE_RETURN_ERROR_ON_MSG(k == 0, "k must be > 0"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(C == 0, "predictions classes dimension must be > 0"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(N == 0, "predictions batch dimension must be > 0"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(k > C, "k must be <= number of classes (C)"); + + // targets must match batch + // targets is expected to contain N elements (shape [N]) + ARM_COMPUTE_RETURN_ERROR_ON_MSG(src1.num_dimensions() < 1, "targets must have at least 1 dimension"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(src1.tensor_shape()[0] != N, + "targets dimension must match predictions batch dimension (N)"); + + // Output is one byte per batch element: shape [N] + const TensorShape out_shape(N); + + // If dst is already configured, validate it + if (dst.total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&dst, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst.tensor_shape() != out_shape, "dst shape must be [N]"); + } + + const auto uk = CpuTopKVKernel::get_implementation( + CpuTopKVKernelDataTypeISASelectorData{src0.data_type(), CPUInfo::get().get_isa()}); + + ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); + + return Status{}; +} + +} // namespace + +void CpuTopKVKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, uint32_t k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::configure"); + ARM_COMPUTE_UNUSED(src1); // compiler error on v7a + ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, k)); + + const auto uk = CpuTopKVKernel::get_implementation( + CpuTopKVKernelDataTypeISASelectorData{src0->data_type(), CPUInfo::get().get_isa()}); + + ARM_COMPUTE_ERROR_ON_NULLPTR(uk); + + _run_method = uk->ukernel; + _name = std::string("CpuTopKVKernel").append("/").append(uk->name); + _k = k; + // Auto initialize dst if not initialized + auto_init_if_empty(*dst, TensorShape(src0->dimension(1)), 1U, DataType::U8); + // Configure kernel window + Window win = calculate_max_window(*dst, Steps()); + ICpuKernel::configure(win); +} +size_t CpuTopKVKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(thread_count); + ARM_COMPUTE_UNUSED(platform); + + return 1024u; +} + +Status CpuTopKVKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, uint32_t k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::validate"); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst); + + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst, k)); + + return Status{}; +} + +void CpuTopKVKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::run_op"); + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); + + ARM_COMPUTE_ERROR_ON(tensors.empty()); + ARM_COMPUTE_ERROR_ON(_run_method == nullptr); + + const ITensor *predictions = tensors.get_const_tensor(TensorType::ACL_SRC_0); + const ITensor *targets = tensors.get_const_tensor(TensorType::ACL_SRC_1); + ITensor *output = tensors.get_tensor(TensorType::ACL_DST); + _run_method(predictions, targets, output, _k, window); +} + +const char *CpuTopKVKernel::name() const +{ + return _name.c_str(); +} + +const std::vector &CpuTopKVKernel::get_available_kernels() +{ + return available_kernels; +} + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/CpuTopKVKernel.h b/src/cpu/kernels/CpuTopKVKernel.h new file mode 100644 index 0000000000..c2085c2365 --- /dev/null +++ b/src/cpu/kernels/CpuTopKVKernel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H +#define ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H + +#include "arm_compute/core/CPP/CPPTypes.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Window.h" + +#include "src/core/common/Macros.h" +#include "src/cpu/ICpuKernel.h" +#include "src/cpu/kernels/CpuKernelSelectionTypes.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +using CpuTopKVKernelDataTypeISASelectorData = DataTypeISASelectorData; +using CpuTopKVKernelDataTypeISASelectorDataPtr = DataTypeISASelectorPtr; + +/** Interface for the kernel to perform addition between two tensors */ +class CpuTopKVKernel : public ICpuKernel +{ +private: + using TopKVKernelPtr = + std::add_pointer::type; + +public: + struct TopKVKernel + { + const char *name; + const CpuTopKVKernelDataTypeISASelectorDataPtr is_selected; + TopKVKernelPtr ukernel; + }; + + CpuTopKVKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuTopKVKernel); + /** Initialise the kernel's input, dst and border mode. + * + * @param[in] src0 First input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32 + * @param[in] src1 Second input tensor info. Data types supported: U32 + * @param[out] dst The dst tensor info. Data types supported: U8 + */ + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, uint32_t k); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to CpuTopKVKernel::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, uint32_t k); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + const char *name() const override; + + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; + + static const std::vector &get_available_kernels(); + +private: + uint32_t _k{}; + TopKVKernelPtr _run_method{nullptr}; + std::string _name{}; +}; +} // namespace kernels +} // namespace cpu +} // namespace arm_compute +#endif // ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H diff --git a/src/cpu/kernels/topkv/generic/neon/fp16.cpp b/src/cpu/kernels/topkv/generic/neon/fp16.cpp new file mode 100644 index 0000000000..cf0dc460d3 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/fp16.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) + +#include "src/cpu/kernels/topkv/generic/neon/impl.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ +static inline uint32_t reduce_u32x4(uint32x4_t v) +{ +#if defined(__aarch64__) + return vaddvq_u32(v); +#else + uint32x2_t s = vadd_u32(vget_low_u32(v), vget_high_u32(v)); + s = vpadd_u32(s, s); + return vget_lane_u32(s, 0); +#endif +} + +// Explicit specialization for float16_t +template <> +uint32_t count_gt_block(const float16_t *ptr, float16_t threshold) +{ + // Load 8 fp16 + const float16x8_t v16 = vld1q_f16(reinterpret_cast(ptr)); + + // Compare in fp32 (same correctness story you already debugged) + const float32x4_t thr = vdupq_n_f32(threshold); + + const float32x4_t v0 = vcvt_f32_f16(vget_low_f16(v16)); + const float32x4_t v1 = vcvt_f32_f16(vget_high_f16(v16)); + + const uint32x4_t b0 = vshrq_n_u32(vcgtq_f32(v0, thr), 31); + const uint32x4_t b1 = vshrq_n_u32(vcgtq_f32(v1, thr), 31); + + return reduce_u32x4(b0) + reduce_u32x4(b1); +} + +} // namespace detail + +void topkv_fp16_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_neon_wrapper(in1, in2, out, k, win); +} + +template void +detail::topkv_neon_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute + +#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/topkv/generic/neon/fp32.cpp b/src/cpu/kernels/topkv/generic/neon/fp32.cpp new file mode 100644 index 0000000000..ce946ac8a0 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/fp32.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/core/NEON/wrapper/wrapper.h" +#include "src/cpu/kernels/topkv/generic/neon/impl.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ +static inline uint32_t reduce_u32x4(uint32x4_t v) +{ +#if defined(__aarch64__) + return vaddvq_u32(v); +#else + uint32x2_t s = vadd_u32(vget_low_u32(v), vget_high_u32(v)); + s = vpadd_u32(s, s); + return vget_lane_u32(s, 0); +#endif +} + +// Explicit specialization for float: may use float32x4_t etc (only in this TU) +template <> +uint32_t count_gt_block(const float *ptr, float threshold) +{ + using Tag = wrapper::traits::neon_bitvector_tag_t; + + const auto thr_vec = wrapper::vdup_n(threshold, Tag{}); + const auto v = wrapper::vloadq(ptr); + const auto mask = wrapper::vcgt(v, thr_vec); // underlying uint32x4_t + + const uint32x4_t m = mask; + const uint32x4_t b = vshrq_n_u32(m, 31); + return reduce_u32x4(b); +} + +} // namespace detail + +void topkv_fp32_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_neon_wrapper(in1, in2, out, k, win); +} + +// Force codegen of the template into this TU (optional but recommended to keep things tidy) +template void detail::topkv_neon_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/topkv/generic/neon/impl.h b/src/cpu/kernels/topkv/generic/neon/impl.h new file mode 100644 index 0000000000..d4abca3605 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/impl.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_NEON_IMPL_H +#define ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_NEON_IMPL_H + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Window.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ +template +uint32_t count_gt_block(const ScalarType *ptr, ScalarType threshold); + +template +void topkv_neon_wrapper( + const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &window) +{ + const auto &pred_info = *predictions->info(); + const unsigned int C = pred_info.tensor_shape()[0]; + + ARM_COMPUTE_ERROR_ON(pred_info.strides_in_bytes()[0] != sizeof(ScalarType)); + + constexpr unsigned int vec_elems = 16 / sizeof(ScalarType); + + Window win = window; + + Iterator tgt_it(targets, win); + Iterator out_it(out, win); + + execute_window_loop( + win, + [&](const Coordinates &id) + { + const int n = id.x(); + const uint32_t t = *reinterpret_cast(tgt_it.ptr()); + + const ScalarType *base = + reinterpret_cast(predictions->ptr_to_element(Coordinates{0, n})); + + // integer/quant: compare against target value (+1 for strict > if you want eps-like behavior) + const ScalarType thr = base[t]; + + uint32_t rank = 0; + unsigned int c = 0; + + for (; c + vec_elems <= C; c += vec_elems) + { + rank += count_gt_block(base + c, thr); + } + + for (; c < C; ++c) + { + rank += (base[c] > thr) ? 1u : 0u; + } + + *reinterpret_cast(out_it.ptr()) = static_cast(rank < k); + }, + tgt_it, out_it); +} +} // namespace detail +} // namespace cpu +} // namespace arm_compute + +#endif // ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_NEON_IMPL_H diff --git a/src/cpu/kernels/topkv/generic/neon/integer.cpp b/src/cpu/kernels/topkv/generic/neon/integer.cpp new file mode 100644 index 0000000000..b8dc523ab4 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/integer.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/cpu/kernels/topkv/generic/neon/impl.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ +static inline uint32_t reduce_u32x4(uint32x4_t v) +{ +#if defined(__aarch64__) + return vaddvq_u32(v); +#else + uint32x2_t s = vadd_u32(vget_low_u32(v), vget_high_u32(v)); + s = vpadd_u32(s, s); + return vget_lane_u32(s, 0); +#endif +} + +template <> +uint32_t count_gt_block(const int32_t *ptr, int32_t threshold) +{ + const int32x4_t v = vld1q_s32(ptr); + const int32x4_t thr = vdupq_n_s32(threshold); + const uint32x4_t m = vcgtq_s32(v, thr); // 0xFFFFFFFF / 0 per lane + const uint32x4_t b = vshrq_n_u32(m, 31); // 0/1 per lane + return reduce_u32x4(b); +} + +} // namespace detail + +void topkv_s32_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_neon_wrapper(in1, in2, out, k, win); +} + +template void +detail::topkv_neon_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp b/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp new file mode 100644 index 0000000000..f777f991bc --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/cpu/kernels/topkv/generic/neon/impl.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ +static inline uint32_t reduce_u8_to_count(uint8x16_t m) +{ +#if defined(__aarch64__) + // mask is 0xFF where true, 0 otherwise -> shift to 0/1 then sum + const uint8x16_t ones = vshrq_n_u8(m, 7); + return vaddvq_u8(ones); +#else + const uint8x16_t ones = vshrq_n_u8(m, 7); + uint16x8_t s16 = vpaddlq_u8(ones); + uint32x4_t s32 = vpaddlq_u16(s16); + uint64x2_t s64 = vpaddlq_u32(s32); + return static_cast(vgetq_lane_u64(s64, 0) + vgetq_lane_u64(s64, 1)); +#endif +} + +template <> +uint32_t count_gt_block(const uint8_t *ptr, uint8_t threshold) +{ + const uint8x16_t v = vld1q_u8(ptr); + const uint8x16_t thr = vdupq_n_u8(threshold); + const uint8x16_t m = vcgtq_u8(v, thr); // 0xFF / 0x00 bytes + return reduce_u8_to_count(m); +} + +} // namespace detail + +void topkv_qasymm8_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_neon_wrapper(in1, in2, out, k, win); +} + +template void +detail::topkv_neon_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp b/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp new file mode 100644 index 0000000000..0970fa4add --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/cpu/kernels/topkv/generic/neon/impl.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ +static inline uint32_t reduce_u8_to_count(uint8x16_t m) +{ +#if defined(__aarch64__) + const uint8x16_t ones = vshrq_n_u8(m, 7); + return vaddvq_u8(ones); +#else + const uint8x16_t ones = vshrq_n_u8(m, 7); + uint16x8_t s16 = vpaddlq_u8(ones); + uint32x4_t s32 = vpaddlq_u16(s16); + uint64x2_t s64 = vpaddlq_u32(s32); + return static_cast(vgetq_lane_u64(s64, 0) + vgetq_lane_u64(s64, 1)); +#endif +} + +template <> +uint32_t count_gt_block(const int8_t *ptr, int8_t threshold) +{ + const int8x16_t v = vld1q_s8(ptr); + const int8x16_t thr = vdupq_n_s8(threshold); + const uint8x16_t m = vcgtq_s8(v, thr); // returns uint8x16_t mask + return reduce_u8_to_count(m); +} + +} // namespace detail + +void topkv_qasymm8_signed_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_neon_wrapper(in1, in2, out, k, win); +} + +template void detail::topkv_neon_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/topkv/list.h b/src/cpu/kernels/topkv/list.h new file mode 100644 index 0000000000..3719c5eb42 --- /dev/null +++ b/src/cpu/kernels/topkv/list.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_TOPKV_LIST_H +#define ACL_SRC_CPU_KERNELS_TOPKV_LIST_H + +namespace arm_compute +{ +namespace cpu +{ +#define DECLARE_TOPKV_KERNEL(func_name) \ + void func_name(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) + +DECLARE_TOPKV_KERNEL(topkv_qasymm8_neon); +DECLARE_TOPKV_KERNEL(topkv_qasymm8_signed_neon); +DECLARE_TOPKV_KERNEL(topkv_fp16_neon); +DECLARE_TOPKV_KERNEL(topkv_fp32_neon); +DECLARE_TOPKV_KERNEL(topkv_s32_neon); + +#undef DECLARE_TOPKV_KERNEL +} // namespace cpu +} // namespace arm_compute + +#endif // ACL_SRC_CPU_KERNELS_TOPKV_LIST_H diff --git a/src/cpu/operators/CpuTopKV.cpp b/src/cpu/operators/CpuTopKV.cpp new file mode 100644 index 0000000000..46a5f97471 --- /dev/null +++ b/src/cpu/operators/CpuTopKV.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/cpu/operators/CpuTopKV.h" + +#include "arm_compute/runtime/NEON/NEScheduler.h" + +#include "src/common/IOperator.h" +#include "src/common/utils/Log.h" +#include "src/common/utils/profile/acl_profile.h" +#include "src/cpu/kernels/CpuTopKVKernel.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +void CpuTopKV::configure(const ITensorInfo *predictions, + const ITensorInfo *targets, + ITensorInfo *output, + const unsigned int k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::configure"); + ARM_COMPUTE_LOG_PARAMS(predictions, targets, output, k); + + auto kernel = std::make_unique(); + kernel->configure(predictions, targets, output, k); + _kernel = std::move(kernel); +} + +Status CpuTopKV::validate(const ITensorInfo *predictions, + const ITensorInfo *targets, + ITensorInfo *output, + const unsigned int k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::validate"); + return kernels::CpuTopKVKernel::validate(predictions, targets, output, k); +} + +void CpuTopKV::run(ITensorPack &tensors) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::run"); + ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); + + NEScheduler::get().schedule_op(_kernel.get(), Window::DimX, _kernel->window(), tensors); +} +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/operators/CpuTopKV.h b/src/cpu/operators/CpuTopKV.h new file mode 100644 index 0000000000..b0a81a028b --- /dev/null +++ b/src/cpu/operators/CpuTopKV.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_OPERATORS_CPUTOPKV_H +#define ACL_SRC_CPU_OPERATORS_CPUTOPKV_H + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" + +#include "src/cpu/ICpuOperator.h" + +namespace arm_compute +{ +namespace cpu +{ +/** Basic function to run @ref kernels::CpuTopKVKernel */ +class CpuTopKV : public ICpuOperator +{ +public: + /** Set the input and output of the kernel. + * + * @param[in] predictions A classes x batches tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED/S32 + * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: U32 + * @param[out] output Computed precision at @p k as a bool 1D tensor. Data types supported: U8 + * @param[in] k Number of top elements to look at for computing precision. + */ + void + configure(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + + /** Static function to check if given info will lead to a valid configuration. + * + * Similar to @ref CpuTopKV::configure() + * + * @return a status + */ + static Status + validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; +}; +} // namespace cpu +} // namespace arm_compute +#endif // ACL_SRC_CPU_OPERATORS_CPUTOPKV_H diff --git a/src/runtime/NEON/functions/NETopKV.cpp b/src/runtime/NEON/functions/NETopKV.cpp new file mode 100644 index 0000000000..3e942591dc --- /dev/null +++ b/src/runtime/NEON/functions/NETopKV.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NETopKV.h" + +#include "arm_compute/core/Validate.h" + +#include "src/common/utils/Log.h" +#include "src/common/utils/profile/acl_profile.h" +#include "src/cpu/operators/CpuTopKV.h" + +namespace arm_compute +{ +struct NETopKV::Impl +{ + const ITensor *predictions{nullptr}; + const ITensor *targets{nullptr}; + ITensor *output{nullptr}; + std::unique_ptr op{nullptr}; +}; + +NETopKV::NETopKV() : _impl(std::make_unique()) +{ +} +NETopKV::NETopKV(NETopKV &&) = default; +NETopKV &NETopKV::operator=(NETopKV &&) = default; +NETopKV::~NETopKV() = default; + +void NETopKV::configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::configure"); + ARM_COMPUTE_LOG_PARAMS(predictions, targets, output, k); + + _impl->predictions = predictions; + _impl->targets = targets; + _impl->output = output; + + ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->predictions, _impl->targets, _impl->output); + + _impl->op = std::make_unique(); + _impl->op->configure(_impl->predictions->info(), _impl->targets->info(), _impl->output->info(), k); +} + +Status +NETopKV::validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::validate"); + ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(predictions, targets, output); + return cpu::CpuTopKV::validate(predictions, targets, output, k); +} + +void NETopKV::run() +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::run"); + ARM_COMPUTE_LOG_PARAMS(_impl->predictions, _impl->targets, _impl->output, k); + + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->predictions); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->targets); + pack.add_tensor(TensorType::ACL_DST, _impl->output); + _impl->op->run(pack); +} +} // namespace arm_compute diff --git a/tests/datasets/TopKVDataset.h b/tests/datasets/TopKVDataset.h new file mode 100644 index 0000000000..e96006ef80 --- /dev/null +++ b/tests/datasets/TopKVDataset.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_DATASETS_TOPKVDATASET_H +#define ACL_TESTS_DATASETS_TOPKVDATASET_H + +#include "arm_compute/core/TensorShape.h" + +#include "tests/framework/datasets/Datasets.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +/** Parent type for all for shape datasets. */ +using ShapeDataset = framework::dataset::ContainerDataset>; + +/** Data set containing small edge cases for TopKV operator. */ +class Small1DTopKV final : public ShapeDataset +{ +public: + Small1DTopKV() : ShapeDataset("Shape", {TensorShape{8U, 1U}, TensorShape{1U, 8U}}) + { + } +}; + +/** Data set containing small 2D tensor shapes for TopKV operator. */ +class SmallTopKV final : public ShapeDataset +{ +public: + SmallTopKV() + : ShapeDataset("Shape", + {TensorShape{8U, 1U}, TensorShape{8U, 7U}, TensorShape{15U, 13U}, TensorShape{32U, 64U}}) + { + } +}; + +/** Data set containing small 2D tensor shapes for TopKV operator. */ +class LargeTopKV final : public ShapeDataset +{ +public: + LargeTopKV() + : ShapeDataset("Shape", {TensorShape{1000U, 64U}, TensorShape{1500U, 128U}, TensorShape{1000U, 32000U}}) + { + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_DATASETS_TOPKVDATASET_H diff --git a/tests/validation/NEON/TopKV.cpp b/tests/validation/NEON/TopKV.cpp new file mode 100644 index 0000000000..e7887c7f2a --- /dev/null +++ b/tests/validation/NEON/TopKV.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/utils/misc/Traits.h" +#include "arm_compute/runtime/CPP/functions/CPPTopKV.h" +#include "arm_compute/runtime/NEON/functions/NETopKV.h" +#include "arm_compute/runtime/Tensor.h" + +#include "tests/datasets/TopKVDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/NEON/Accessor.h" +#include "tests/validation/fixtures/TopKVLayerFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using framework::dataset::make; + +const auto tiny_dataset_topkv = combine(datasets::Small1DTopKV(), make("K", 1)); +const auto small_dataset_topkv = combine(datasets::SmallTopKV(), make("K", 3, 5)); +const auto large_dataset_topkv = combine(datasets::LargeTopKV(), make("K", 3, 5)); +const auto s32_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::S32)); +const auto s32_tiny_dataset = combine(tiny_dataset_topkv, make("DataType", DataType::S32)); +const auto s32_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::S32)); +const auto f32_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::F32)); +const auto f32_tiny_dataset = combine(tiny_dataset_topkv, make("DataType", DataType::F32)); +const auto f32_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::F32)); +const auto f16_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::F16)); +const auto f16_tiny_dataset = combine(tiny_dataset_topkv, make("DataType", DataType::F16)); +const auto f16_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::F16)); +const auto qu8_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::QASYMM8)); +const auto qu8_tiny_dataset = combine(tiny_dataset_topkv, make("DataType", DataType::QASYMM8)); +const auto qu8_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::QASYMM8)); +const auto qs8_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::QASYMM8_SIGNED)); +const auto qs8_tiny_dataset = combine(tiny_dataset_topkv, make("DataType", DataType::QASYMM8)); +const auto qs8_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::QASYMM8_SIGNED)); + +constexpr AbsoluteTolerance ZeroTolerance{0}; + +TEST_SUITE(NEON) +TEST_SUITE(TopKVLayer) +// clang-format on +// *INDENT-ON* +template +using NETopKVFixture = TopKVValidationFixture; + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture, framework::DatasetMode::ALL, qu8_tiny_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, qu8_small_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, qu8_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture, framework::DatasetMode::ALL, qs8_tiny_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, qs8_small_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, qs8_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + +TEST_SUITE(Float) +#ifdef ARM_COMPUTE_ENABLE_FP16 +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture, framework::DatasetMode::ALL, f16_tiny_dataset) +{ + if (CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); + } + else + { + ARM_COMPUTE_TEST_WARNING("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_WARNING(); + } +} +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, f16_small_dataset) +{ + if (CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); + } + else + { + ARM_COMPUTE_TEST_WARNING("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_WARNING(); + } +} +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, f16_large_dataset) +{ + if (CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); + } + else + { + ARM_COMPUTE_TEST_WARNING("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_WARNING(); + } +} +TEST_SUITE_END() // FP16 +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture, framework::DatasetMode::ALL, f32_tiny_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, f32_small_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, f32_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float + +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture, framework::DatasetMode::ALL, s32_tiny_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, s32_small_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, s32_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +TEST_SUITE_END() // S32 + +TEST_SUITE_END() // TopKVLayer +TEST_SUITE_END() // NEON + +TEST_SUITE(CPP) +TEST_SUITE(TopKVLayer) + +template +using CPPTopKVLayerFixture = TopKVValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +// Used only for benchmarking +FIXTURE_DATA_TEST_CASE(RunLarge, CPPTopKVLayerFixture, framework::DatasetMode::DISABLED, f32_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, ZeroTolerance); +} +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // TopKVLayer +TEST_SUITE_END() // NEON + +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/TopKVLayerFixture.h b/tests/validation/fixtures/TopKVLayerFixture.h new file mode 100644 index 0000000000..1be7c38e1f --- /dev/null +++ b/tests/validation/fixtures/TopKVLayerFixture.h @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H + +#include "tests/AssetsLibrary.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/Globals.h" +#include "tests/validation/reference/TopKV.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class TopKVValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape predictions_shape, uint32_t k, DataType input_data_type) + { + if (std::is_same::value && // Cpu + input_data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const TensorShape targets_shape(predictions_shape[1]); + _data_type = input_data_type; + _target = compute_target(predictions_shape, targets_shape, k); + _reference = compute_reference(predictions_shape, targets_shape, k); + } + +protected: + // Fills prediction scores with small random noise, assigns a random target class to each sample, + // and slightly boosts the target score so some targets fall inside the top-K and others do not. + template + void fill(U &&predictions, W &&targets, int32_t i, uint32_t C) + { + std::mt19937 gen(0); + + // 1) Fill targets with valid class indices in [0, C-1] + std::uniform_int_distribution class_dist(0, C - 1); + library->fill(targets, class_dist, i); + + const unsigned int N = targets.shape()[0]; + + // 2) Float predictions (F32/F16): small noise + per-sample boost to target class + if (_data_type == DataType::F32 || _data_type == DataType::F16) + { + std::normal_distribution noise(0.f, 0.1f); + library->fill(predictions, noise, i); + + // Mixture of boosts so output is not always 1 + std::uniform_real_distribution u01(0.f, 1.f); + std::normal_distribution strong_boost(0.8f, 0.15f); + std::normal_distribution medium_boost(0.35f, 0.15f); + std::normal_distribution weak_boost(0.05f, 0.10f); + + for (unsigned int n = 0; n < N; ++n) + { + const uint32_t target = *reinterpret_cast(targets(Coordinates{static_cast(n)})); + + float boost = 0.f; + const float r = u01(gen); + if (r < 0.35f) + { + boost = strong_boost(gen); + } + else if (r < 0.80f) + { + boost = medium_boost(gen); + } + else + { + boost = weak_boost(gen); + } + boost = std::max(-0.2f, std::min(boost, 1.5f)); + + const Coordinates pc{static_cast(target), static_cast(n)}; + + if (_data_type == DataType::F32) + { + *reinterpret_cast(predictions(pc)) += boost; + } + else // F16 (half) + { + auto *p = reinterpret_cast(predictions(pc)); + *p = static_cast(static_cast(*p) + boost); + } + } + return; + } + + // 3) Quantized predictions (QASYMM8 / QASYMM8_SIGNED): small integer noise + small boost + if (_data_type == DataType::QASYMM8 || _data_type == DataType::QASYMM8_SIGNED) + { + const bool is_signed = (_data_type == DataType::QASYMM8_SIGNED); + + // Small integer noise + if (is_signed) + { + std::uniform_int_distribution noise(-6, 6); + library->fill(predictions, noise, i); + } + else + { + std::uniform_int_distribution noise(0, 12); + library->fill(predictions, noise, i); + } + + // Small, variable boost in quantized units + std::uniform_int_distribution boost_dist(0, 18); + + for (unsigned int n = 0; n < N; ++n) + { + const uint32_t target = *reinterpret_cast(targets(Coordinates{static_cast(n)})); + + const int boost = boost_dist(gen); + const Coordinates pc{static_cast(target), static_cast(n)}; + + if (is_signed) + { + auto *p = reinterpret_cast(predictions(pc)); + const int v = static_cast(*p) + boost; + *p = static_cast(std::max(-128, std::min(127, v))); + } + else + { + auto *p = reinterpret_cast(predictions(pc)); + const int v = static_cast(*p) + boost; + *p = static_cast(std::max(0, std::min(255, v))); + } + } + return; + } + + // 4) S32 predictions: integer noise + per-sample integer boost to target class + if (_data_type == DataType::S32) + { + // Small integer noise around 0 + std::uniform_int_distribution noise(-20, 20); + library->fill(predictions, noise, i); + + // Mixture of integer boosts so output is not always 1 + std::uniform_real_distribution u01(0.f, 1.f); + std::normal_distribution strong_boost(120.f, 25.f); + std::normal_distribution medium_boost(45.f, 20.f); + std::normal_distribution weak_boost(5.f, 10.f); + + for (unsigned int n = 0; n < N; ++n) + { + const uint32_t target = *reinterpret_cast(targets(Coordinates{static_cast(n)})); + + float boost_f = 0.f; + const float r = u01(gen); + if (r < 0.35f) + { + boost_f = strong_boost(gen); + } + else if (r < 0.80f) + { + boost_f = medium_boost(gen); + } + else + { + boost_f = weak_boost(gen); + } + + // Clamp boost and convert to integer units + boost_f = std::max(-50.f, std::min(boost_f, 200.f)); + const int32_t boost = static_cast(std::lrint(boost_f)); + + const Coordinates pc{static_cast(target), static_cast(n)}; + + auto *p = reinterpret_cast(predictions(pc)); + // Saturating add (avoid UB on overflow) + const int64_t v64 = static_cast(*p) + static_cast(boost); + const int64_t clamped = std::max(std::numeric_limits::min(), + std::min(std::numeric_limits::max(), v64)); + *p = static_cast(clamped); + } + return; + } + } + + TensorType compute_target(const TensorShape &pred_shape, const TensorShape &targets_shape, uint32_t k) + { + // Create tensors + TensorType pred = create_tensor(pred_shape, _data_type, 1, QuantizationInfo()); + TensorType targets = create_tensor(targets_shape, DataType::U32, 1, QuantizationInfo()); + TensorType output; + + // Create and configure function + FunctionType topkv_layer; + topkv_layer.configure(&pred, &targets, &output, k); + + ARM_COMPUTE_ASSERT(pred.info()->is_resizable()); + ARM_COMPUTE_ASSERT(targets.info()->is_resizable()); + + // Allocate tensors + pred.allocator()->allocate(); + targets.allocator()->allocate(); + output.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!pred.info()->is_resizable()); + + // Fill tensors + const auto C{pred_shape[0]}; + fill(AccessorType(pred), AccessorType(targets), 0, C); + + topkv_layer.run(); + + return output; + } + + SimpleTensor compute_reference(const TensorShape &pred_shape, const TensorShape &targets_shape, uint32_t k) + { + // Create reference + SimpleTensor pred{pred_shape, _data_type, 1, QuantizationInfo()}; + SimpleTensor targets{targets_shape, DataType::U32, 1, QuantizationInfo()}; + + const auto C{pred_shape[0]}; + fill(pred, targets, 0, C); + + return reference::topkv(pred, targets, k); + } + +protected: + TensorType _target{}; + SimpleTensor _reference{}; + DataType _data_type{}; +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H diff --git a/tests/validation/reference/TopKV.cpp b/tests/validation/reference/TopKV.cpp new file mode 100644 index 0000000000..d94ab714f0 --- /dev/null +++ b/tests/validation/reference/TopKV.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "tests/validation/reference/TopKV.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/TensorShape.h" + +#include "tests/SimpleTensor.h" + +#include +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ + +template +SimpleTensor topkv(SimpleTensor &predictions, SimpleTensor &targets, uint32_t k) +{ + const TensorShape &ps = predictions.shape(); + const int C = ps[0]; // classes + const int N = ps[1]; // batch + + SimpleTensor expected(TensorShape(N), DataType::U8); + + const float eps = std::numeric_limits::epsilon(); + + for (int i = 0; i < N; ++i) + { + // targets[i] (U32) + const uint32_t target_class = targets[i]; + + // Read predictions[target_class, i] as T, then promote to float + const T target_t = *reinterpret_cast(predictions(Coordinates{target_class, i})); + const float target_val = static_cast(target_t); + + unsigned int rank = 0; + for (int c = 0; c < C; ++c) + { + const T vt = *reinterpret_cast(predictions(Coordinates{c, i})); + const float v = static_cast(vt); + + if ((v - target_val) > eps) + { + ++rank; + } + } + + expected[i] = static_cast(rank < k); + } + + return expected; +} + +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); + +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/TopKV.h b/tests/validation/reference/TopKV.h new file mode 100644 index 0000000000..c90f748905 --- /dev/null +++ b/tests/validation/reference/TopKV.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H +#define ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H + +#include "tests/SimpleTensor.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template +SimpleTensor topkv(SimpleTensor &predictions, SimpleTensor &targets, uint32_t k); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H