diff --git a/Android.bp b/Android.bp
index 39ba145195..8a23f593dc 100644
--- a/Android.bp
+++ b/Android.bp
@@ -462,6 +462,7 @@ cc_library_static {
         "src/cpu/kernels/CpuScatterKernel.cpp",
         "src/cpu/kernels/CpuSoftmaxKernel.cpp",
         "src/cpu/kernels/CpuSubKernel.cpp",
+        "src/cpu/kernels/CpuTopKVKernel.cpp",
         "src/cpu/kernels/CpuTransposeKernel.cpp",
         "src/cpu/kernels/CpuWeightsReshapeKernel.cpp",
         "src/cpu/kernels/CpuWinogradConv2dKernel.cpp",
@@ -611,6 +612,11 @@ cc_library_static {
         "src/cpu/kernels/sub/neon/qasymm8.cpp",
         "src/cpu/kernels/sub/neon/qasymm8_signed.cpp",
         "src/cpu/kernels/sub/neon/qsymm16.cpp",
+        "src/cpu/kernels/topkv/generic/neon/fp16.cpp",
+        "src/cpu/kernels/topkv/generic/neon/fp32.cpp",
+        "src/cpu/kernels/topkv/generic/neon/integer.cpp",
+        "src/cpu/kernels/topkv/generic/neon/qasymm8.cpp",
+        "src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp",
         "src/cpu/operators/CpuActivation.cpp",
         "src/cpu/operators/CpuAdd.cpp",
         "src/cpu/operators/CpuAddMulAdd.cpp",
@@ -649,6 +655,7 @@ cc_library_static {
         "src/cpu/operators/CpuScatter.cpp",
         "src/cpu/operators/CpuSoftmax.cpp",
         "src/cpu/operators/CpuSub.cpp",
+        "src/cpu/operators/CpuTopKV.cpp",
         "src/cpu/operators/CpuTranspose.cpp",
         "src/cpu/operators/CpuWinogradConv2d.cpp",
         "src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp",
@@ -978,6 +985,7 @@ cc_library_static {
         "src/runtime/NEON/functions/NEStackLayer.cpp",
         "src/runtime/NEON/functions/NEStridedSlice.cpp",
         "src/runtime/NEON/functions/NETile.cpp",
+        "src/runtime/NEON/functions/NETopKV.cpp",
         "src/runtime/NEON/functions/NETranspose.cpp",
         "src/runtime/NEON/functions/NEUnstack.cpp",
         "src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp",
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 49a6e9fefb..82388da2d7 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2025 Arm Limited.
+ * Copyright (c) 2016-2026 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -112,6 +112,7 @@
 #include "arm_compute/runtime/NEON/functions/NEStackLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
 #include "arm_compute/runtime/NEON/functions/NETile.h"
+#include "arm_compute/runtime/NEON/functions/NETopKV.h"
 #include "arm_compute/runtime/NEON/functions/NETranspose.h"
 #include "arm_compute/runtime/NEON/functions/NEUnstack.h"
 #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NETopKV.h b/arm_compute/runtime/NEON/functions/NETopKV.h
new file mode 100644
index 0000000000..bdf9b10cd1
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NETopKV.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H
+
+/** @file
+ * @publicapi
+ */
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+
+/** Basic function to run cpu::kernels::CpuTopKVKernel
+ *
+ */
+class NETopKV : public IFunction
+{
+public:
+    /** Constructor */
+    NETopKV();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NETopKV(const NETopKV &) = delete;
+    /** Default move constructor */
+    NETopKV(NETopKV &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NETopKV &operator=(const NETopKV &) = delete;
+    /** Default move assignment operator */
+    NETopKV &operator=(NETopKV &&);
+    /** Destructor */
+    ~NETopKV();
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  predictions A batch_size x classes tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED/S32
+     * @param[in]  targets     A batch_size 1D tensor of class ids. Data types supported: U32
+     * @param[out] output      Computed precision at @p k as a bool 1D tensor. Data types supported: U8
+     * @param[in]  k           Number of top elements to look at for computing precision.
+     */
+    void configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k);
+
+    /** Static function to check if given info will lead to a valid configuration.
+     *
+     * Similar to @ref NETopKV::configure()
+     *
+     * @return a status
+     */
+    static Status
+    validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
+
+    // Inherited methods overridden
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
+};
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H
diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox
index c3a3ac59f4..0e4dc0ef64 100644
--- a/docs/user_guide/operator_list.dox
+++ b/docs/user_guide/operator_list.dox
@@ -3219,6 +3219,55 @@ where N = batches, C = channels, H = height, W = width, D = depth
     <tr><th>src<th>dst
     <tr><td>All<td>All
     </table>
+<tr>
+  <td>TopKV</td>
+  <td style="width:200px;">Function to perform TopKV.</td>
+  <td>
+    <ul>
+      <li>ANEURALNETWORKS_TOPK_V2</li>
+    </ul>
+  </td>
+  <td>NETopKV</td>
+  <td>
+    <ul>
+      <li>n/a</li>
+    </ul>
+  </td>
+  <td>
+    <table>
+      <tr>
+        <th>src0</th>
+        <th>src1</th>
+        <th>dst</th>
+      </tr>
+      <tr>
+        <td>QASYMM8</td>
+        <td>U32</td>
+        <td>U8</td>
+      </tr>
+      <tr>
+        <td>QASYMM8_SIGNED</td>
+        <td>U32</td>
+        <td>U8</td>
+      </tr>
+      <tr>
+        <td>S32</td>
+        <td>U32</td>
+        <td>U8</td>
+      </tr>
+      <tr>
+        <td>F16</td>
+        <td>F32</td>
+        <td>U8</td>
+      </tr>
+      <tr>
+        <td>F32</td>
+        <td>U32</td>
+        <td>U8</td>
+      </tr>
+    </table>
+  </td>
+</tr>
 <tr>
   <td rowspan="2">Transpose
   <td rowspan="2" style="width:200px;"> Function to transpose a 2D tensor.
diff --git a/filelist.json b/filelist.json
index 9cf4b72eb1..1fea5a80e7 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2453,6 +2453,27 @@
           ]
         }
       },
+      "TopKV": {
+        "files": {
+          "common": [
+            "src/cpu/kernels/CpuTopKVKernel.cpp",
+            "src/cpu/operators/CpuTopKV.cpp",
+            "src/runtime/NEON/functions/NETopKV.cpp"
+          ],
+          "neon": {
+            "fp16": [ "src/cpu/kernels/topkv/generic/neon/fp16.cpp" ],
+            "fp32": [ "src/cpu/kernels/topkv/generic/neon/fp32.cpp" ],
+            "integer":["src/cpu/kernels/topkv/generic/neon/integer.cpp"],
+            "qasymm8": [
+              "src/cpu/kernels/topkv/generic/neon/qasymm8.cpp"
+            ],
+            "qasymm8_signed": [
+              "src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp"
+            ]
+          }
+
+        }
+      },
       "Transpose": {
         "files": {
           "common": [
diff --git a/src/BUILD.bazel b/src/BUILD.bazel
index 3c0b83369a..0c2a0cda99 100644
--- a/src/BUILD.bazel
+++ b/src/BUILD.bazel
@@ -738,6 +738,7 @@ filegroup(
 	"cpu/kernels/CpuScatterKernel.cpp",
 	"cpu/kernels/CpuSoftmaxKernel.cpp",
 	"cpu/kernels/CpuSubKernel.cpp",
+	"cpu/kernels/CpuTopKVKernel.cpp",
 	"cpu/kernels/CpuTransposeKernel.cpp",
 	"cpu/kernels/CpuWeightsReshapeKernel.cpp",
 	"cpu/kernels/CpuWinogradConv2dKernel.cpp",
@@ -848,6 +849,10 @@ filegroup(
 	"cpu/kernels/sub/neon/qasymm8.cpp",
 	"cpu/kernels/sub/neon/qasymm8_signed.cpp",
 	"cpu/kernels/sub/neon/qsymm16.cpp",
+	"cpu/kernels/topkv/generic/neon/fp32.cpp",
+	"cpu/kernels/topkv/generic/neon/integer.cpp",
+	"cpu/kernels/topkv/generic/neon/qasymm8.cpp",
+	"cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp",
 	"cpu/operators/CpuActivation.cpp",
 	"cpu/operators/CpuAdd.cpp",
 	"cpu/operators/CpuAddMulAdd.cpp",
@@ -886,6 +891,7 @@ filegroup(
 	"cpu/operators/CpuScatter.cpp",
 	"cpu/operators/CpuSoftmax.cpp",
 	"cpu/operators/CpuSub.cpp",
+	"cpu/operators/CpuTopKV.cpp",
 	"cpu/operators/CpuTranspose.cpp",
 	"cpu/operators/CpuWinogradConv2d.cpp",
 	"cpu/operators/internal/CpuGemmAssemblyDispatch.cpp",
@@ -994,6 +1000,7 @@ filegroup(
 	"runtime/NEON/functions/NEStackLayer.cpp",
 	"runtime/NEON/functions/NEStridedSlice.cpp",
 	"runtime/NEON/functions/NETile.cpp",
+	"runtime/NEON/functions/NETopKV.cpp",
 	"runtime/NEON/functions/NETranspose.cpp",
 	"runtime/NEON/functions/NEUnstack.cpp",
 	"runtime/NEON/functions/NEWinogradConvolutionLayer.cpp",
@@ -1109,7 +1116,8 @@ filegroup(
 	"cpu/kernels/scatter/generic/neon/fp16.cpp",
 	"cpu/kernels/select/generic/neon/fp16.cpp",
 	"cpu/kernels/softmax/generic/neon/fp16.cpp",
-	"cpu/kernels/sub/neon/fp16.cpp"]  +
+	"cpu/kernels/sub/neon/fp16.cpp",
+	"cpu/kernels/topkv/generic/neon/fp16.cpp"]  +
     glob(["**/*.h",
     "**/*.hpp",
     "**/*.inl"]),
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 74847f6878..bba1506afe 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -732,6 +732,7 @@ target_sources(
 	cpu/kernels/CpuScatterKernel.cpp
 	cpu/kernels/CpuSoftmaxKernel.cpp
 	cpu/kernels/CpuSubKernel.cpp
+	cpu/kernels/CpuTopKVKernel.cpp
 	cpu/kernels/CpuTransposeKernel.cpp
 	cpu/kernels/CpuWeightsReshapeKernel.cpp
 	cpu/kernels/CpuWinogradConv2dKernel.cpp
@@ -842,6 +843,10 @@ target_sources(
 	cpu/kernels/sub/neon/qasymm8.cpp
 	cpu/kernels/sub/neon/qasymm8_signed.cpp
 	cpu/kernels/sub/neon/qsymm16.cpp
+	cpu/kernels/topkv/generic/neon/fp32.cpp
+	cpu/kernels/topkv/generic/neon/integer.cpp
+	cpu/kernels/topkv/generic/neon/qasymm8.cpp
+	cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp
 	cpu/operators/CpuActivation.cpp
 	cpu/operators/CpuAdd.cpp
 	cpu/operators/CpuAddMulAdd.cpp
@@ -880,6 +885,7 @@ target_sources(
 	cpu/operators/CpuScatter.cpp
 	cpu/operators/CpuSoftmax.cpp
 	cpu/operators/CpuSub.cpp
+	cpu/operators/CpuTopKV.cpp
 	cpu/operators/CpuTranspose.cpp
 	cpu/operators/CpuWinogradConv2d.cpp
 	cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
@@ -988,6 +994,7 @@ target_sources(
 	runtime/NEON/functions/NEStackLayer.cpp
 	runtime/NEON/functions/NEStridedSlice.cpp
 	runtime/NEON/functions/NETile.cpp
+	runtime/NEON/functions/NETopKV.cpp
 	runtime/NEON/functions/NETranspose.cpp
 	runtime/NEON/functions/NEUnstack.cpp
 	runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -1109,4 +1116,5 @@ target_sources(
 	cpu/kernels/select/generic/neon/fp16.cpp
 	cpu/kernels/softmax/generic/neon/fp16.cpp
 	cpu/kernels/sub/neon/fp16.cpp
+	cpu/kernels/topkv/generic/neon/fp16.cpp
 )
\ No newline at end of file
diff --git a/src/cpu/kernels/CpuTopKVKernel.cpp b/src/cpu/kernels/CpuTopKVKernel.cpp
new file mode 100644
index 0000000000..e62f12d65c
--- /dev/null
+++ b/src/cpu/kernels/CpuTopKVKernel.cpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/kernels/CpuTopKVKernel.h"
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/common/utils/profile/acl_profile.h"
+#include "src/core/common/Registrars.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/cpu/kernels/topkv/list.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+
+namespace kernels
+{
+namespace
+{
+
+static const std::vector<CpuTopKVKernel::TopKVKernel> available_kernels = {
+    {"neon_s32_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::S32); },
+     REGISTER_FP32_NEON(arm_compute::cpu::topkv_s32_neon)},
+    {"neon_fp32_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F32); },
+     REGISTER_FP32_NEON(arm_compute::cpu::topkv_fp32_neon)},
+    {"neon_fp16_topkv",
+     [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F16) && data.isa.fp16; },
+     REGISTER_FP16_NEON(arm_compute::cpu::topkv_fp16_neon)},
+    {"neon_qu8_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8); },
+     REGISTER_QASYMM8_NEON(arm_compute::cpu::topkv_qasymm8_neon)},
+    {"neon_qs8_topkv",
+     [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8_SIGNED); },
+     REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::topkv_qasymm8_signed_neon)}};
+
+Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, uint32_t k)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src0);
+
+    // src0: predictions (logical shape [C, N], where N defaults to 1 if dimension 1 is absent)
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+                                                         DataType::S32, DataType::F16, DataType::F32);
+
+    // src1: targets (class indices)
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src1, 1, DataType::U32);
+
+    // predictions must at least have C
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(src0.num_dimensions() < 1, "predictions must have at least 1 dimension (C)");
+
+    const unsigned int C = src0.tensor_shape()[0]; // classes
+    const unsigned int N = src0.tensor_shape()[1]; // batch (defaults to 1 if not present)
+
+    // k constraints
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(k == 0, "k must be > 0");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(C == 0, "predictions classes dimension must be > 0");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(N == 0, "predictions batch dimension must be > 0");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(k > C, "k must be <= number of classes (C)");
+
+    // targets must match batch
+    // targets is expected to contain N elements (shape [N])
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(src1.num_dimensions() < 1, "targets must have at least 1 dimension");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(src1.tensor_shape()[0] != N,
+                                    "targets dimension must match predictions batch dimension (N)");
+
+    // Output is one byte per batch element: shape [N]
+    const TensorShape out_shape(N);
+
+    // If dst is already configured, validate it
+    if (dst.total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&dst, 1, DataType::U8);
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst.tensor_shape() != out_shape, "dst shape must be [N]");
+    }
+
+    const auto uk = CpuTopKVKernel::get_implementation<CpuTopKVKernelDataTypeISASelectorData>(
+        CpuTopKVKernelDataTypeISASelectorData{src0.data_type(), CPUInfo::get().get_isa()});
+
+    ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
+
+    return Status{};
+}
+
+} // namespace
+
+void CpuTopKVKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, uint32_t k)
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::configure");
+    ARM_COMPUTE_UNUSED(src1); // compiler error on v7a
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, k));
+
+    const auto uk = CpuTopKVKernel::get_implementation<CpuTopKVKernelDataTypeISASelectorData>(
+        CpuTopKVKernelDataTypeISASelectorData{src0->data_type(), CPUInfo::get().get_isa()});
+
+    ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
+
+    _run_method = uk->ukernel;
+    _name       = std::string("CpuTopKVKernel").append("/").append(uk->name);
+    _k          = k;
+    // Auto initialize dst if not initialized
+    auto_init_if_empty(*dst, TensorShape(src0->dimension(1)), 1U, DataType::U8);
+    // Configure kernel window
+    Window win = calculate_max_window(*dst, Steps());
+    ICpuKernel::configure(win);
+}
+size_t CpuTopKVKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
+{
+    ARM_COMPUTE_UNUSED(thread_count);
+    ARM_COMPUTE_UNUSED(platform);
+
+    return 1024u;
+}
+
+Status CpuTopKVKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, uint32_t k)
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::validate");
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
+
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst, k));
+
+    return Status{};
+}
+
+void CpuTopKVKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::run_op");
+    ARM_COMPUTE_UNUSED(info);
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+
+    ARM_COMPUTE_ERROR_ON(tensors.empty());
+    ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
+
+    const ITensor *predictions = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+    const ITensor *targets     = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+    ITensor       *output      = tensors.get_tensor(TensorType::ACL_DST);
+    _run_method(predictions, targets, output, _k, window);
+}
+
+const char *CpuTopKVKernel::name() const
+{
+    return _name.c_str();
+}
+
+const std::vector<CpuTopKVKernel::TopKVKernel> &CpuTopKVKernel::get_available_kernels()
+{
+    return available_kernels;
+}
+
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/CpuTopKVKernel.h b/src/cpu/kernels/CpuTopKVKernel.h
new file mode 100644
index 0000000000..c2085c2365
--- /dev/null
+++ b/src/cpu/kernels/CpuTopKVKernel.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H
+#define ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H
+
+#include "arm_compute/core/CPP/CPPTypes.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Window.h"
+
+#include "src/core/common/Macros.h"
+#include "src/cpu/ICpuKernel.h"
+#include "src/cpu/kernels/CpuKernelSelectionTypes.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+using CpuTopKVKernelDataTypeISASelectorData    = DataTypeISASelectorData;
+using CpuTopKVKernelDataTypeISASelectorDataPtr = DataTypeISASelectorPtr;
+
+/** Interface for the kernel to perform addition between two tensors */
+class CpuTopKVKernel : public ICpuKernel<CpuTopKVKernel>
+{
+private:
+    using TopKVKernelPtr =
+        std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &)>::type;
+
+public:
+    struct TopKVKernel
+    {
+        const char                                    *name;
+        const CpuTopKVKernelDataTypeISASelectorDataPtr is_selected;
+        TopKVKernelPtr                                 ukernel;
+    };
+
+    CpuTopKVKernel() = default;
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuTopKVKernel);
+    /** Initialise the kernel's input, dst and border mode.
+     *
+     * @param[in]  src0 First input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32
+     * @param[in]  src1 Second input tensor info. Data types supported: U32
+     * @param[out] dst  The dst tensor info. Data types supported: U8
+     */
+    void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, uint32_t k);
+    /** Static function to check if given info will lead to a valid configuration
+     *
+     * Similar to CpuTopKVKernel::configure()
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, uint32_t k);
+
+    // Inherited methods overridden:
+    void        run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+    const char *name() const override;
+
+    /** Return minimum workload size of the relevant kernel
+     *
+     * @param[in] platform     The CPU platform used to create the context.
+     * @param[in] thread_count Number of threads in the execution.
+     *
+     * @return[out] small_network_mws          Minimum workload size for requsted configuration.
+     */
+    size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
+
+    static const std::vector<TopKVKernel> &get_available_kernels();
+
+private:
+    uint32_t       _k{};
+    TopKVKernelPtr _run_method{nullptr};
+    std::string    _name{};
+};
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+#endif // ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H
diff --git a/src/cpu/kernels/topkv/generic/neon/fp16.cpp b/src/cpu/kernels/topkv/generic/neon/fp16.cpp
new file mode 100644
index 0000000000..cf0dc460d3
--- /dev/null
+++ b/src/cpu/kernels/topkv/generic/neon/fp16.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+
+#include "src/cpu/kernels/topkv/generic/neon/impl.h"
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace detail
+{
+static inline uint32_t reduce_u32x4(uint32x4_t v)
+{
+#if defined(__aarch64__)
+    return vaddvq_u32(v);
+#else
+    uint32x2_t s = vadd_u32(vget_low_u32(v), vget_high_u32(v));
+    s            = vpadd_u32(s, s);
+    return vget_lane_u32(s, 0);
+#endif
+}
+
+// Explicit specialization for float16_t
+template <>
+uint32_t count_gt_block<float16_t>(const float16_t *ptr, float16_t threshold)
+{
+    // Load 8 fp16
+    const float16x8_t v16 = vld1q_f16(reinterpret_cast<const __fp16 *>(ptr));
+
+    // Compare in fp32 (same correctness story you already debugged)
+    const float32x4_t thr = vdupq_n_f32(threshold);
+
+    const float32x4_t v0 = vcvt_f32_f16(vget_low_f16(v16));
+    const float32x4_t v1 = vcvt_f32_f16(vget_high_f16(v16));
+
+    const uint32x4_t b0 = vshrq_n_u32(vcgtq_f32(v0, thr), 31);
+    const uint32x4_t b1 = vshrq_n_u32(vcgtq_f32(v1, thr), 31);
+
+    return reduce_u32x4(b0) + reduce_u32x4(b1);
+}
+
+} // namespace detail
+
+void topkv_fp16_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win)
+{
+    detail::topkv_neon_wrapper<float16_t>(in1, in2, out, k, win);
+}
+
+template void
+detail::topkv_neon_wrapper<float16_t>(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &);
+
+} // namespace cpu
+} // namespace arm_compute
+
+#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
diff --git a/src/cpu/kernels/topkv/generic/neon/fp32.cpp b/src/cpu/kernels/topkv/generic/neon/fp32.cpp
new file mode 100644
index 0000000000..ce946ac8a0
--- /dev/null
+++ b/src/cpu/kernels/topkv/generic/neon/fp32.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/cpu/kernels/topkv/generic/neon/impl.h"
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace detail
+{
+static inline uint32_t reduce_u32x4(uint32x4_t v)
+{
+#if defined(__aarch64__)
+    return vaddvq_u32(v);
+#else
+    uint32x2_t s = vadd_u32(vget_low_u32(v), vget_high_u32(v));
+    s            = vpadd_u32(s, s);
+    return vget_lane_u32(s, 0);
+#endif
+}
+
+// Explicit specialization for float: may use float32x4_t etc (only in this TU)
+template <>
+uint32_t count_gt_block<float>(const float *ptr, float threshold)
+{
+    using Tag = wrapper::traits::neon_bitvector_tag_t<float, wrapper::traits::BitWidth::W128>;
+
+    const auto thr_vec = wrapper::vdup_n(threshold, Tag{});
+    const auto v       = wrapper::vloadq(ptr);
+    const auto mask    = wrapper::vcgt(v, thr_vec); // underlying uint32x4_t
+
+    const uint32x4_t m = mask;
+    const uint32x4_t b = vshrq_n_u32(m, 31);
+    return reduce_u32x4(b);
+}
+
+} // namespace detail
+
+void topkv_fp32_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win)
+{
+    detail::topkv_neon_wrapper<float>(in1, in2, out, k, win);
+}
+
+// Force codegen of the template into this TU (optional but recommended to keep things tidy)
+template void detail::topkv_neon_wrapper<float>(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &);
+
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/topkv/generic/neon/impl.h b/src/cpu/kernels/topkv/generic/neon/impl.h
new file mode 100644
index 0000000000..d4abca3605
--- /dev/null
+++ b/src/cpu/kernels/topkv/generic/neon/impl.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_NEON_IMPL_H
+#define ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_NEON_IMPL_H
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Window.h"
+
+#include <limits>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace detail
+{
+template <typename ScalarType>
+uint32_t count_gt_block(const ScalarType *ptr, ScalarType threshold);
+
+template <typename ScalarType>
+void topkv_neon_wrapper(
+    const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &window)
+{
+    const auto        &pred_info = *predictions->info();
+    const unsigned int C         = pred_info.tensor_shape()[0];
+
+    ARM_COMPUTE_ERROR_ON(pred_info.strides_in_bytes()[0] != sizeof(ScalarType));
+
+    constexpr unsigned int vec_elems = 16 / sizeof(ScalarType);
+
+    Window win = window;
+
+    Iterator tgt_it(targets, win);
+    Iterator out_it(out, win);
+
+    execute_window_loop(
+        win,
+        [&](const Coordinates &id)
+        {
+            const int      n = id.x();
+            const uint32_t t = *reinterpret_cast<const uint32_t *>(tgt_it.ptr());
+
+            const ScalarType *base =
+                reinterpret_cast<const ScalarType *>(predictions->ptr_to_element(Coordinates{0, n}));
+
+            // integer/quant: compare against target value (+1 for strict > if you want eps-like behavior)
+            const ScalarType thr = base[t];
+
+            uint32_t     rank = 0;
+            unsigned int c    = 0;
+
+            for (; c + vec_elems <= C; c += vec_elems)
+            {
+                rank += count_gt_block<ScalarType>(base + c, thr);
+            }
+
+            for (; c < C; ++c)
+            {
+                rank += (base[c] > thr) ? 1u : 0u;
+            }
+
+            *reinterpret_cast<uint8_t *>(out_it.ptr()) = static_cast<uint8_t>(rank < k);
+        },
+        tgt_it, out_it);
+}
+} // namespace detail
+} // namespace cpu
+} // namespace arm_compute
+
+#endif // ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_NEON_IMPL_H
diff --git a/src/cpu/kernels/topkv/generic/neon/integer.cpp b/src/cpu/kernels/topkv/generic/neon/integer.cpp
new file mode 100644
index 0000000000..b8dc523ab4
--- /dev/null
+++ b/src/cpu/kernels/topkv/generic/neon/integer.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/kernels/topkv/generic/neon/impl.h"
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace detail
+{
+static inline uint32_t reduce_u32x4(uint32x4_t v)
+{
+#if defined(__aarch64__)
+    return vaddvq_u32(v);
+#else
+    uint32x2_t s = vadd_u32(vget_low_u32(v), vget_high_u32(v));
+    s            = vpadd_u32(s, s);
+    return vget_lane_u32(s, 0);
+#endif
+}
+
+template <>
+uint32_t count_gt_block<int32_t>(const int32_t *ptr, int32_t threshold)
+{
+    const int32x4_t  v   = vld1q_s32(ptr);
+    const int32x4_t  thr = vdupq_n_s32(threshold);
+    const uint32x4_t m   = vcgtq_s32(v, thr);  // 0xFFFFFFFF / 0 per lane
+    const uint32x4_t b   = vshrq_n_u32(m, 31); // 0/1 per lane
+    return reduce_u32x4(b);
+}
+
+} // namespace detail
+
+void topkv_s32_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win)
+{
+    detail::topkv_neon_wrapper<int32_t>(in1, in2, out, k, win);
+}
+
+template void
+detail::topkv_neon_wrapper<int32_t>(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &);
+
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp b/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp
new file mode 100644
index 0000000000..f777f991bc
--- /dev/null
+++ b/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/cpu/kernels/topkv/generic/neon/impl.h"
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace detail
+{
+static inline uint32_t reduce_u8_to_count(uint8x16_t m)
+{
+#if defined(__aarch64__)
+    // mask is 0xFF where true, 0 otherwise -> shift to 0/1 then sum
+    const uint8x16_t ones = vshrq_n_u8(m, 7);
+    return vaddvq_u8(ones);
+#else
+    const uint8x16_t ones = vshrq_n_u8(m, 7);
+    uint16x8_t       s16  = vpaddlq_u8(ones);
+    uint32x4_t       s32  = vpaddlq_u16(s16);
+    uint64x2_t       s64  = vpaddlq_u32(s32);
+    return static_cast<uint32_t>(vgetq_lane_u64(s64, 0) + vgetq_lane_u64(s64, 1));
+#endif
+}
+
+template <>
+uint32_t count_gt_block<uint8_t>(const uint8_t *ptr, uint8_t threshold)
+{
+    const uint8x16_t v   = vld1q_u8(ptr);
+    const uint8x16_t thr = vdupq_n_u8(threshold);
+    const uint8x16_t m   = vcgtq_u8(v, thr); // 0xFF / 0x00 bytes
+    return reduce_u8_to_count(m);
+}
+
+} // namespace detail
+
+void topkv_qasymm8_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win)
+{
+    detail::topkv_neon_wrapper<uint8_t>(in1, in2, out, k, win);
+}
+
+template void
+detail::topkv_neon_wrapper<uint8_t>(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &);
+
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp b/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp
new file mode 100644
index 0000000000..0970fa4add
--- /dev/null
+++ b/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/kernels/topkv/generic/neon/impl.h"
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace detail
+{
+static inline uint32_t reduce_u8_to_count(uint8x16_t m)
+{
+#if defined(__aarch64__)
+    const uint8x16_t ones = vshrq_n_u8(m, 7);
+    return vaddvq_u8(ones);
+#else
+    const uint8x16_t ones = vshrq_n_u8(m, 7);
+    uint16x8_t       s16  = vpaddlq_u8(ones);
+    uint32x4_t       s32  = vpaddlq_u16(s16);
+    uint64x2_t       s64  = vpaddlq_u32(s32);
+    return static_cast<uint32_t>(vgetq_lane_u64(s64, 0) + vgetq_lane_u64(s64, 1));
+#endif
+}
+
+template <>
+uint32_t count_gt_block<int8_t>(const int8_t *ptr, int8_t threshold)
+{
+    const int8x16_t  v   = vld1q_s8(ptr);
+    const int8x16_t  thr = vdupq_n_s8(threshold);
+    const uint8x16_t m   = vcgtq_s8(v, thr); // returns uint8x16_t mask
+    return reduce_u8_to_count(m);
+}
+
+} // namespace detail
+
+void topkv_qasymm8_signed_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win)
+{
+    detail::topkv_neon_wrapper<int8_t>(in1, in2, out, k, win);
+}
+
+template void detail::topkv_neon_wrapper<int8_t>(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &);
+
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/topkv/list.h b/src/cpu/kernels/topkv/list.h
new file mode 100644
index 0000000000..3719c5eb42
--- /dev/null
+++ b/src/cpu/kernels/topkv/list.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_CPU_KERNELS_TOPKV_LIST_H
+#define ACL_SRC_CPU_KERNELS_TOPKV_LIST_H
+
+namespace arm_compute
+{
+namespace cpu
+{
+#define DECLARE_TOPKV_KERNEL(func_name) \
+    void func_name(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win)
+
+DECLARE_TOPKV_KERNEL(topkv_qasymm8_neon);
+DECLARE_TOPKV_KERNEL(topkv_qasymm8_signed_neon);
+DECLARE_TOPKV_KERNEL(topkv_fp16_neon);
+DECLARE_TOPKV_KERNEL(topkv_fp32_neon);
+DECLARE_TOPKV_KERNEL(topkv_s32_neon);
+
+#undef DECLARE_TOPKV_KERNEL
+} // namespace cpu
+} // namespace arm_compute
+
+#endif // ACL_SRC_CPU_KERNELS_TOPKV_LIST_H
diff --git a/src/cpu/operators/CpuTopKV.cpp b/src/cpu/operators/CpuTopKV.cpp
new file mode 100644
index 0000000000..46a5f97471
--- /dev/null
+++ b/src/cpu/operators/CpuTopKV.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/operators/CpuTopKV.h"
+
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include "src/common/IOperator.h"
+#include "src/common/utils/Log.h"
+#include "src/common/utils/profile/acl_profile.h"
+#include "src/cpu/kernels/CpuTopKVKernel.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void CpuTopKV::configure(const ITensorInfo *predictions,
+                         const ITensorInfo *targets,
+                         ITensorInfo       *output,
+                         const unsigned int k)
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::configure");
+    ARM_COMPUTE_LOG_PARAMS(predictions, targets, output, k);
+
+    auto kernel = std::make_unique<kernels::CpuTopKVKernel>();
+    kernel->configure(predictions, targets, output, k);
+    _kernel = std::move(kernel);
+}
+
+Status CpuTopKV::validate(const ITensorInfo *predictions,
+                          const ITensorInfo *targets,
+                          ITensorInfo       *output,
+                          const unsigned int k)
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::validate");
+    return kernels::CpuTopKVKernel::validate(predictions, targets, output, k);
+}
+
+void CpuTopKV::run(ITensorPack &tensors)
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::run");
+    ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
+
+    NEScheduler::get().schedule_op(_kernel.get(), Window::DimX, _kernel->window(), tensors);
+}
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/operators/CpuTopKV.h b/src/cpu/operators/CpuTopKV.h
new file mode 100644
index 0000000000..b0a81a028b
--- /dev/null
+++ b/src/cpu/operators/CpuTopKV.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_CPU_OPERATORS_CPUTOPKV_H
+#define ACL_SRC_CPU_OPERATORS_CPUTOPKV_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/TensorInfo.h"
+
+#include "src/cpu/ICpuOperator.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** Basic function to run @ref kernels::CpuTopKVKernel */
+class CpuTopKV : public ICpuOperator
+{
+public:
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  predictions A classes x batches tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED/S32
+     * @param[in]  targets     A batch_size 1D tensor of class ids. Data types supported: U32
+     * @param[out] output      Computed precision at @p k as a bool 1D tensor. Data types supported: U8
+     * @param[in]  k           Number of top elements to look at for computing precision.
+     */
+    void
+    configure(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
+
+    /** Static function to check if given info will lead to a valid configuration.
+     *
+     * Similar to @ref CpuTopKV::configure()
+     *
+     * @return a status
+     */
+    static Status
+    validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
+
+    // Inherited methods overridden:
+    void run(ITensorPack &tensors) override;
+};
+} // namespace cpu
+} // namespace arm_compute
+#endif // ACL_SRC_CPU_OPERATORS_CPUTOPKV_H
diff --git a/src/runtime/NEON/functions/NETopKV.cpp b/src/runtime/NEON/functions/NETopKV.cpp
new file mode 100644
index 0000000000..3e942591dc
--- /dev/null
+++ b/src/runtime/NEON/functions/NETopKV.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NETopKV.h"
+
+#include "arm_compute/core/Validate.h"
+
+#include "src/common/utils/Log.h"
+#include "src/common/utils/profile/acl_profile.h"
+#include "src/cpu/operators/CpuTopKV.h"
+
+namespace arm_compute
+{
+struct NETopKV::Impl
+{
+    const ITensor                 *predictions{nullptr};
+    const ITensor                 *targets{nullptr};
+    ITensor                       *output{nullptr};
+    std::unique_ptr<cpu::CpuTopKV> op{nullptr};
+};
+
+NETopKV::NETopKV() : _impl(std::make_unique<Impl>())
+{
+}
+NETopKV::NETopKV(NETopKV &&)            = default;
+NETopKV &NETopKV::operator=(NETopKV &&) = default;
+NETopKV::~NETopKV()                     = default;
+
+void NETopKV::configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k)
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::configure");
+    ARM_COMPUTE_LOG_PARAMS(predictions, targets, output, k);
+
+    _impl->predictions = predictions;
+    _impl->targets     = targets;
+    _impl->output      = output;
+
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->predictions, _impl->targets, _impl->output);
+
+    _impl->op = std::make_unique<cpu::CpuTopKV>();
+    _impl->op->configure(_impl->predictions->info(), _impl->targets->info(), _impl->output->info(), k);
+}
+
+Status
+NETopKV::validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k)
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::validate");
+    ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(predictions, targets, output);
+    return cpu::CpuTopKV::validate(predictions, targets, output, k);
+}
+
+void NETopKV::run()
+{
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::run");
+    ARM_COMPUTE_LOG_PARAMS(_impl->predictions, _impl->targets, _impl->output, k);
+
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC_0, _impl->predictions);
+    pack.add_tensor(TensorType::ACL_SRC_1, _impl->targets);
+    pack.add_tensor(TensorType::ACL_DST, _impl->output);
+    _impl->op->run(pack);
+}
+} // namespace arm_compute
diff --git a/tests/datasets/TopKVDataset.h b/tests/datasets/TopKVDataset.h
new file mode 100644
index 0000000000..e96006ef80
--- /dev/null
+++ b/tests/datasets/TopKVDataset.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_DATASETS_TOPKVDATASET_H
+#define ACL_TESTS_DATASETS_TOPKVDATASET_H
+
+#include "arm_compute/core/TensorShape.h"
+
+#include "tests/framework/datasets/Datasets.h"
+
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+/** Parent type for all for shape datasets. */
+using ShapeDataset = framework::dataset::ContainerDataset<std::vector<TensorShape>>;
+
+/** Data set containing small edge cases for TopKV operator. */
+class Small1DTopKV final : public ShapeDataset
+{
+public:
+    Small1DTopKV() : ShapeDataset("Shape", {TensorShape{8U, 1U}, TensorShape{1U, 8U}})
+    {
+    }
+};
+
+/** Data set containing small 2D tensor shapes for TopKV operator. */
+class SmallTopKV final : public ShapeDataset
+{
+public:
+    SmallTopKV()
+        : ShapeDataset("Shape",
+                       {TensorShape{8U, 1U}, TensorShape{8U, 7U}, TensorShape{15U, 13U}, TensorShape{32U, 64U}})
+    {
+    }
+};
+
+/** Data set containing small 2D tensor shapes for TopKV operator. */
+class LargeTopKV final : public ShapeDataset
+{
+public:
+    LargeTopKV()
+        : ShapeDataset("Shape", {TensorShape{1000U, 64U}, TensorShape{1500U, 128U}, TensorShape{1000U, 32000U}})
+    {
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_DATASETS_TOPKVDATASET_H
diff --git a/tests/validation/NEON/TopKV.cpp b/tests/validation/NEON/TopKV.cpp
new file mode 100644
index 0000000000..e7887c7f2a
--- /dev/null
+++ b/tests/validation/NEON/TopKV.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/runtime/CPP/functions/CPPTopKV.h"
+#include "arm_compute/runtime/NEON/functions/NETopKV.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include "tests/datasets/TopKVDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/validation/fixtures/TopKVLayerFixture.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using framework::dataset::make;
+
+const auto tiny_dataset_topkv  = combine(datasets::Small1DTopKV(), make("K", 1));
+const auto small_dataset_topkv = combine(datasets::SmallTopKV(), make("K", 3, 5));
+const auto large_dataset_topkv = combine(datasets::LargeTopKV(), make("K", 3, 5));
+const auto s32_small_dataset   = combine(small_dataset_topkv, make("DataType", DataType::S32));
+const auto s32_tiny_dataset    = combine(tiny_dataset_topkv, make("DataType", DataType::S32));
+const auto s32_large_dataset   = combine(large_dataset_topkv, make("DataType", DataType::S32));
+const auto f32_small_dataset   = combine(small_dataset_topkv, make("DataType", DataType::F32));
+const auto f32_tiny_dataset    = combine(tiny_dataset_topkv, make("DataType", DataType::F32));
+const auto f32_large_dataset   = combine(large_dataset_topkv, make("DataType", DataType::F32));
+const auto f16_small_dataset   = combine(small_dataset_topkv, make("DataType", DataType::F16));
+const auto f16_tiny_dataset    = combine(tiny_dataset_topkv, make("DataType", DataType::F16));
+const auto f16_large_dataset   = combine(large_dataset_topkv, make("DataType", DataType::F16));
+const auto qu8_small_dataset   = combine(small_dataset_topkv, make("DataType", DataType::QASYMM8));
+const auto qu8_tiny_dataset    = combine(tiny_dataset_topkv, make("DataType", DataType::QASYMM8));
+const auto qu8_large_dataset   = combine(large_dataset_topkv, make("DataType", DataType::QASYMM8));
+const auto qs8_small_dataset   = combine(small_dataset_topkv, make("DataType", DataType::QASYMM8_SIGNED));
+const auto qs8_tiny_dataset    = combine(tiny_dataset_topkv, make("DataType", DataType::QASYMM8));
+const auto qs8_large_dataset   = combine(large_dataset_topkv, make("DataType", DataType::QASYMM8_SIGNED));
+
+constexpr AbsoluteTolerance<uint8_t> ZeroTolerance{0};
+
+TEST_SUITE(NEON)
+TEST_SUITE(TopKVLayer)
+// clang-format on
+// *INDENT-ON*
+template <typename T>
+using NETopKVFixture = TopKVValidationFixture<Tensor, Accessor, NETopKV, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture<uint8_t>, framework::DatasetMode::ALL, qu8_tiny_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture<uint8_t>, framework::DatasetMode::ALL, qu8_small_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture<uint8_t>, framework::DatasetMode::NIGHTLY, qu8_large_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture<int8_t>, framework::DatasetMode::ALL, qs8_tiny_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture<int8_t>, framework::DatasetMode::ALL, qs8_small_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture<int8_t>, framework::DatasetMode::NIGHTLY, qs8_large_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+
+TEST_SUITE(Float)
+#ifdef ARM_COMPUTE_ENABLE_FP16
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture<half>, framework::DatasetMode::ALL, f16_tiny_dataset)
+{
+    if (CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, ZeroTolerance);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_WARNING("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_WARNING();
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture<half>, framework::DatasetMode::ALL, f16_small_dataset)
+{
+    if (CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, ZeroTolerance);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_WARNING("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_WARNING();
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture<half>, framework::DatasetMode::NIGHTLY, f16_large_dataset)
+{
+    if (CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        validate(Accessor(_target), _reference, ZeroTolerance);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_WARNING("Device does not support fp16 vector operations. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_WARNING();
+    }
+}
+TEST_SUITE_END() // FP16
+#endif           /* ARM_COMPUTE_ENABLE_FP16 */
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture<float>, framework::DatasetMode::ALL, f32_tiny_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture<float>, framework::DatasetMode::ALL, f32_small_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture<float>, framework::DatasetMode::NIGHTLY, f32_large_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunTiny, NETopKVFixture<int32_t>, framework::DatasetMode::ALL, s32_tiny_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture<int32_t>, framework::DatasetMode::ALL, s32_small_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture<int32_t>, framework::DatasetMode::NIGHTLY, s32_large_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE_END() // TopKVLayer
+TEST_SUITE_END() // NEON
+
+TEST_SUITE(CPP)
+TEST_SUITE(TopKVLayer)
+
+template <typename T>
+using CPPTopKVLayerFixture = TopKVValidationFixture<Tensor, Accessor, CPPTopKV, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+// Used only for benchmarking
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPTopKVLayerFixture<float>, framework::DatasetMode::DISABLED, f32_large_dataset)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, ZeroTolerance);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // TopKVLayer
+TEST_SUITE_END() // NEON
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/TopKVLayerFixture.h b/tests/validation/fixtures/TopKVLayerFixture.h
new file mode 100644
index 0000000000..1be7c38e1f
--- /dev/null
+++ b/tests/validation/fixtures/TopKVLayerFixture.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H
+
+#include "tests/AssetsLibrary.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/validation/reference/TopKV.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <random>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class TopKVValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape predictions_shape, uint32_t k, DataType input_data_type)
+    {
+        if (std::is_same<TensorType, Tensor>::value && // Cpu
+            input_data_type == DataType::F16 && !CPUInfo::get().has_fp16())
+        {
+            return;
+        }
+        const TensorShape targets_shape(predictions_shape[1]);
+        _data_type = input_data_type;
+        _target    = compute_target(predictions_shape, targets_shape, k);
+        _reference = compute_reference(predictions_shape, targets_shape, k);
+    }
+
+protected:
+    // Fills prediction scores with small random noise, assigns a random target class to each sample,
+    // and slightly boosts the target score so some targets fall inside the top-K and others do not.
+    template <typename U, typename W>
+    void fill(U &&predictions, W &&targets, int32_t i, uint32_t C)
+    {
+        std::mt19937 gen(0);
+
+        // 1) Fill targets with valid class indices in [0, C-1]
+        std::uniform_int_distribution<uint32_t> class_dist(0, C - 1);
+        library->fill(targets, class_dist, i);
+
+        const unsigned int N = targets.shape()[0];
+
+        // 2) Float predictions (F32/F16): small noise + per-sample boost to target class
+        if (_data_type == DataType::F32 || _data_type == DataType::F16)
+        {
+            std::normal_distribution<float> noise(0.f, 0.1f);
+            library->fill(predictions, noise, i);
+
+            // Mixture of boosts so output is not always 1
+            std::uniform_real_distribution<float> u01(0.f, 1.f);
+            std::normal_distribution<float>       strong_boost(0.8f, 0.15f);
+            std::normal_distribution<float>       medium_boost(0.35f, 0.15f);
+            std::normal_distribution<float>       weak_boost(0.05f, 0.10f);
+
+            for (unsigned int n = 0; n < N; ++n)
+            {
+                const uint32_t target = *reinterpret_cast<const uint32_t *>(targets(Coordinates{static_cast<int>(n)}));
+
+                float       boost = 0.f;
+                const float r     = u01(gen);
+                if (r < 0.35f)
+                {
+                    boost = strong_boost(gen);
+                }
+                else if (r < 0.80f)
+                {
+                    boost = medium_boost(gen);
+                }
+                else
+                {
+                    boost = weak_boost(gen);
+                }
+                boost = std::max(-0.2f, std::min(boost, 1.5f));
+
+                const Coordinates pc{static_cast<int>(target), static_cast<int>(n)};
+
+                if (_data_type == DataType::F32)
+                {
+                    *reinterpret_cast<float *>(predictions(pc)) += boost;
+                }
+                else // F16 (half)
+                {
+                    auto *p = reinterpret_cast<half *>(predictions(pc));
+                    *p      = static_cast<half>(static_cast<float>(*p) + boost);
+                }
+            }
+            return;
+        }
+
+        // 3) Quantized predictions (QASYMM8 / QASYMM8_SIGNED): small integer noise + small boost
+        if (_data_type == DataType::QASYMM8 || _data_type == DataType::QASYMM8_SIGNED)
+        {
+            const bool is_signed = (_data_type == DataType::QASYMM8_SIGNED);
+
+            // Small integer noise
+            if (is_signed)
+            {
+                std::uniform_int_distribution<int> noise(-6, 6);
+                library->fill(predictions, noise, i);
+            }
+            else
+            {
+                std::uniform_int_distribution<int> noise(0, 12);
+                library->fill(predictions, noise, i);
+            }
+
+            // Small, variable boost in quantized units
+            std::uniform_int_distribution<int> boost_dist(0, 18);
+
+            for (unsigned int n = 0; n < N; ++n)
+            {
+                const uint32_t target = *reinterpret_cast<const uint32_t *>(targets(Coordinates{static_cast<int>(n)}));
+
+                const int         boost = boost_dist(gen);
+                const Coordinates pc{static_cast<int>(target), static_cast<int>(n)};
+
+                if (is_signed)
+                {
+                    auto     *p = reinterpret_cast<int8_t *>(predictions(pc));
+                    const int v = static_cast<int>(*p) + boost;
+                    *p          = static_cast<int8_t>(std::max(-128, std::min(127, v)));
+                }
+                else
+                {
+                    auto     *p = reinterpret_cast<uint8_t *>(predictions(pc));
+                    const int v = static_cast<int>(*p) + boost;
+                    *p          = static_cast<uint8_t>(std::max(0, std::min(255, v)));
+                }
+            }
+            return;
+        }
+
+        // 4) S32 predictions: integer noise + per-sample integer boost to target class
+        if (_data_type == DataType::S32)
+        {
+            // Small integer noise around 0
+            std::uniform_int_distribution<int> noise(-20, 20);
+            library->fill(predictions, noise, i);
+
+            // Mixture of integer boosts so output is not always 1
+            std::uniform_real_distribution<float> u01(0.f, 1.f);
+            std::normal_distribution<float>       strong_boost(120.f, 25.f);
+            std::normal_distribution<float>       medium_boost(45.f, 20.f);
+            std::normal_distribution<float>       weak_boost(5.f, 10.f);
+
+            for (unsigned int n = 0; n < N; ++n)
+            {
+                const uint32_t target = *reinterpret_cast<const uint32_t *>(targets(Coordinates{static_cast<int>(n)}));
+
+                float       boost_f = 0.f;
+                const float r       = u01(gen);
+                if (r < 0.35f)
+                {
+                    boost_f = strong_boost(gen);
+                }
+                else if (r < 0.80f)
+                {
+                    boost_f = medium_boost(gen);
+                }
+                else
+                {
+                    boost_f = weak_boost(gen);
+                }
+
+                // Clamp boost and convert to integer units
+                boost_f             = std::max(-50.f, std::min(boost_f, 200.f));
+                const int32_t boost = static_cast<int32_t>(std::lrint(boost_f));
+
+                const Coordinates pc{static_cast<int>(target), static_cast<int>(n)};
+
+                auto *p = reinterpret_cast<int32_t *>(predictions(pc));
+                // Saturating add (avoid UB on overflow)
+                const int64_t v64     = static_cast<int64_t>(*p) + static_cast<int64_t>(boost);
+                const int64_t clamped = std::max<int64_t>(std::numeric_limits<int32_t>::min(),
+                                                          std::min<int64_t>(std::numeric_limits<int32_t>::max(), v64));
+                *p                    = static_cast<int32_t>(clamped);
+            }
+            return;
+        }
+    }
+
+    TensorType compute_target(const TensorShape &pred_shape, const TensorShape &targets_shape, uint32_t k)
+    {
+        // Create tensors
+        TensorType pred    = create_tensor<TensorType>(pred_shape, _data_type, 1, QuantizationInfo());
+        TensorType targets = create_tensor<TensorType>(targets_shape, DataType::U32, 1, QuantizationInfo());
+        TensorType output;
+
+        // Create and configure function
+        FunctionType topkv_layer;
+        topkv_layer.configure(&pred, &targets, &output, k);
+
+        ARM_COMPUTE_ASSERT(pred.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(targets.info()->is_resizable());
+
+        // Allocate tensors
+        pred.allocator()->allocate();
+        targets.allocator()->allocate();
+        output.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!pred.info()->is_resizable());
+
+        // Fill tensors
+        const auto C{pred_shape[0]};
+        fill(AccessorType(pred), AccessorType(targets), 0, C);
+
+        topkv_layer.run();
+
+        return output;
+    }
+
+    SimpleTensor<uint8_t> compute_reference(const TensorShape &pred_shape, const TensorShape &targets_shape, uint32_t k)
+    {
+        // Create reference
+        SimpleTensor<T>        pred{pred_shape, _data_type, 1, QuantizationInfo()};
+        SimpleTensor<uint32_t> targets{targets_shape, DataType::U32, 1, QuantizationInfo()};
+
+        const auto C{pred_shape[0]};
+        fill(pred, targets, 0, C);
+
+        return reference::topkv(pred, targets, k);
+    }
+
+protected:
+    TensorType            _target{};
+    SimpleTensor<uint8_t> _reference{};
+    DataType              _data_type{};
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H
diff --git a/tests/validation/reference/TopKV.cpp b/tests/validation/reference/TopKV.cpp
new file mode 100644
index 0000000000..d94ab714f0
--- /dev/null
+++ b/tests/validation/reference/TopKV.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "tests/validation/reference/TopKV.h"
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/TensorShape.h"
+
+#include "tests/SimpleTensor.h"
+
+#include <cstdint>
+#include <limits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+
+template <typename T>
+SimpleTensor<uint8_t> topkv(SimpleTensor<T> &predictions, SimpleTensor<uint32_t> &targets, uint32_t k)
+{
+    const TensorShape &ps = predictions.shape();
+    const int          C  = ps[0]; // classes
+    const int          N  = ps[1]; // batch
+
+    SimpleTensor<uint8_t> expected(TensorShape(N), DataType::U8);
+
+    const float eps = std::numeric_limits<float>::epsilon();
+
+    for (int i = 0; i < N; ++i)
+    {
+        // targets[i] (U32)
+        const uint32_t target_class = targets[i];
+
+        // Read predictions[target_class, i] as T, then promote to float
+        const T     target_t   = *reinterpret_cast<const T *>(predictions(Coordinates{target_class, i}));
+        const float target_val = static_cast<float>(target_t);
+
+        unsigned int rank = 0;
+        for (int c = 0; c < C; ++c)
+        {
+            const T     vt = *reinterpret_cast<const T *>(predictions(Coordinates{c, i}));
+            const float v  = static_cast<float>(vt);
+
+            if ((v - target_val) > eps)
+            {
+                ++rank;
+            }
+        }
+
+        expected[i] = static_cast<uint8_t>(rank < k);
+    }
+
+    return expected;
+}
+
+template SimpleTensor<uint8_t> topkv<float>(SimpleTensor<float> &, SimpleTensor<uint32_t> &, uint32_t);
+template SimpleTensor<uint8_t> topkv<half>(SimpleTensor<half> &, SimpleTensor<uint32_t> &, uint32_t);
+template SimpleTensor<uint8_t> topkv<uint8_t>(SimpleTensor<uint8_t> &, SimpleTensor<uint32_t> &, uint32_t);
+template SimpleTensor<uint8_t> topkv<int8_t>(SimpleTensor<int8_t> &, SimpleTensor<uint32_t> &, uint32_t);
+template SimpleTensor<uint8_t> topkv<int32_t>(SimpleTensor<int32_t> &, SimpleTensor<uint32_t> &, uint32_t);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/TopKV.h b/tests/validation/reference/TopKV.h
new file mode 100644
index 0000000000..c90f748905
--- /dev/null
+++ b/tests/validation/reference/TopKV.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2026 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H
+#define ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H
+
+#include "tests/SimpleTensor.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<uint8_t> topkv(SimpleTensor<T> &predictions, SimpleTensor<uint32_t> &targets, uint32_t k);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H