learning-process · SinevArtem · Apr 14, 2026
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
+#include "task/include/task.hpp"
+
+namespace sinev_a_mult_matrix_fox_algorithm {
+
+class SinevAMultMatrixFoxAlgorithmOMP : public BaseTask {
+ public:
+  static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() {
+    return ppc::task::TypeOfTask::kOMP;
+  }
+  explicit SinevAMultMatrixFoxAlgorithmOMP(const InType &in);
+
+ private:
+  bool ValidationImpl() override;
+  bool PreProcessingImpl() override;
+  bool RunImpl() override;
+  bool PostProcessingImpl() override;
+
+  static void SimpleMultiply(size_t n, const std::vector<double> &a, const std::vector<double> &b,
+                             std::vector<double> &c);
+  static void DecomposeToBlocks(const std::vector<double> &src, std::vector<double> &dst, size_t n, size_t bs, int q);
+  static void AssembleFromBlocks(const std::vector<double> &src, std::vector<double> &dst, size_t n, size_t bs, int q);
+  static void FoxStep(const std::vector<double> &blocks_a, const std::vector<double> &blocks_b,
+                      std::vector<double> &blocks_c, size_t bs, int q, int step);
+};
+
+}  // namespace sinev_a_mult_matrix_fox_algorithm
@@ -0,0 +1,151 @@
+#include "sinev_a_mult_matrix_fox_algorithm/omp/include/ops_omp.hpp"
+
+#include <omp.h>
+
+#include <cmath>
+#include <cstddef>
+#include <vector>
+
+#include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
+
+namespace sinev_a_mult_matrix_fox_algorithm {
+
+SinevAMultMatrixFoxAlgorithmOMP::SinevAMultMatrixFoxAlgorithmOMP(const InType &in) {
+  SetTypeOfTask(GetStaticTypeOfTask());
+  GetInput() = in;
+  GetOutput() = {};
+}
+
+bool SinevAMultMatrixFoxAlgorithmOMP::ValidationImpl() {
+  const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
+
+  return matrix_size > 0 && matrix_a.size() == matrix_size * matrix_size &&
+         matrix_b.size() == matrix_size * matrix_size;
+}
+
+bool SinevAMultMatrixFoxAlgorithmOMP::PreProcessingImpl() {
+  const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
+  GetOutput() = std::vector<double>(matrix_size * matrix_size, 0.0);
+  return true;
+}
+
+void SinevAMultMatrixFoxAlgorithmOMP::SimpleMultiply(size_t n, const std::vector<double> &a,
+                                                     const std::vector<double> &b, std::vector<double> &c) {
+#pragma omp parallel for default(none) shared(n, a, b, c) collapse(2)
+  for (size_t i = 0; i < n; ++i) {
+    for (size_t j = 0; j < n; ++j) {
+      double sum = 0.0;
+      for (size_t k = 0; k < n; ++k) {
+        sum += a[(i * n) + k] * b[(k * n) + j];
+      }
+      c[(i * n) + j] = sum;
+    }
+  }
+}
+
+void SinevAMultMatrixFoxAlgorithmOMP::DecomposeToBlocks(const std::vector<double> &src, std::vector<double> &dst,
+                                                        size_t n, size_t bs, int q) {
+#pragma omp parallel for default(none) shared(src, dst, n, bs, q) collapse(2)
+  for (int bi = 0; bi < q; ++bi) {
+    for (int bj = 0; bj < q; ++bj) {
+      const size_t block_off = (static_cast<size_t>((bi * q) + bj)) * (bs * bs);
+      for (size_t i = 0; i < bs; ++i) {
+        for (size_t j = 0; j < bs; ++j) {
+          const size_t src_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j);
+          const size_t dst_idx = block_off + (i * bs) + j;
+          dst[dst_idx] = src[src_idx];
+        }
+      }
+    }
+  }
+}
+
+void SinevAMultMatrixFoxAlgorithmOMP::AssembleFromBlocks(const std::vector<double> &src, std::vector<double> &dst,
+                                                         size_t n, size_t bs, int q) {
+#pragma omp parallel for default(none) shared(src, dst, n, bs, q) collapse(2)
+  for (int bi = 0; bi < q; ++bi) {
+    for (int bj = 0; bj < q; ++bj) {
+      const size_t block_off = (static_cast<size_t>((bi * q) + bj)) * (bs * bs);
+      for (size_t i = 0; i < bs; ++i) {
+        for (size_t j = 0; j < bs; ++j) {
+          const size_t src_idx = block_off + (i * bs) + j;
+          const size_t dst_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j);
+          dst[dst_idx] = src[src_idx];
+        }
+      }
+    }
+  }
+}
+
+void SinevAMultMatrixFoxAlgorithmOMP::FoxStep(const std::vector<double> &blocks_a, const std::vector<double> &blocks_b,
+                                              std::vector<double> &blocks_c, size_t bs, int q, int step) {
+  const size_t block_size_bytes = bs * bs;
+#pragma omp parallel for default(none) shared(blocks_a, blocks_b, blocks_c, bs, q, step, block_size_bytes) collapse(2)
+  for (int i = 0; i < q; ++i) {
+    for (int j = 0; j < q; ++j) {
+      const int k = (i + step) % q;
+
+      const size_t a_off = (static_cast<size_t>((i * q) + k)) * block_size_bytes;
+      const size_t b_off = (static_cast<size_t>((k * q) + j)) * block_size_bytes;
+      const size_t c_off = (static_cast<size_t>((i * q) + j)) * block_size_bytes;
+
+      for (size_t ii = 0; ii < bs; ++ii) {
+        for (size_t kk = 0; kk < bs; ++kk) {
+          const double val = blocks_a[a_off + (ii * bs) + kk];
+          for (size_t jj = 0; jj < bs; ++jj) {
+            blocks_c[c_off + (ii * bs) + jj] += val * blocks_b[b_off + (kk * bs) + jj];
+          }
+        }
+      }
+    }
+  }
+}
+
+bool SinevAMultMatrixFoxAlgorithmOMP::RunImpl() {
+  const auto &input = GetInput();
+  const size_t n = std::get<0>(input);
+  const auto &a = std::get<1>(input);
+  const auto &b = std::get<2>(input);
+  auto &c = GetOutput();
+
+  // Для маленьких матриц используем простое умножение
+  if (n <= 8) {
+    SimpleMultiply(n, a, b, c);
+    return true;
+  }
+
+  size_t bs = 1;
+  auto sqrt_n = static_cast<size_t>(std::sqrt(static_cast<double>(n)));
+  for (size_t div = sqrt_n; div >= 1; --div) {
+    if (n % div == 0) {
+      bs = div;
+      break;
+    }
+  }
+
+  const int actual_q = static_cast<int>(n / bs);
+
+  auto total_blocks = static_cast<size_t>(actual_q) * static_cast<size_t>(actual_q);
+  auto block_elements = bs * bs;
+
+  std::vector<double> blocks_a(total_blocks * block_elements);
+  std::vector<double> blocks_b(total_blocks * block_elements);
+  std::vector<double> blocks_c(total_blocks * block_elements, 0.0);
+
+  DecomposeToBlocks(a, blocks_a, n, bs, actual_q);
+  DecomposeToBlocks(b, blocks_b, n, bs, actual_q);
+
+  for (int step = 0; step < actual_q; ++step) {
+    FoxStep(blocks_a, blocks_b, blocks_c, bs, actual_q, step);
+  }
+
+  AssembleFromBlocks(blocks_c, c, n, bs, actual_q);
+
+  return true;
+}
+
+bool SinevAMultMatrixFoxAlgorithmOMP::PostProcessingImpl() {
+  return true;
+}
+
+}  // namespace sinev_a_mult_matrix_fox_algorithm
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
+#include "sinev_a_mult_matrix_fox_algorithm/omp/include/ops_omp.hpp"
 #include "sinev_a_mult_matrix_fox_algorithm/seq/include/ops_seq.hpp"
 #include "util/include/func_test_util.hpp"
 #include "util/include/util.hpp"
@@ -92,13 +93,19 @@ TEST_P(SinevARunFuncTestsThreads, MatMulFoxAlg) {
   ExecuteTest(GetParam());
 }
 
-const std::array<TestType, 7> kTestParams = {std::make_tuple(1, "size_1x1"), std::make_tuple(2, "size_2x2"),
-                                             std::make_tuple(3, "size_3x3"), std::make_tuple(4, "size_4x4"),
-                                             std::make_tuple(5, "size_5x5"), std::make_tuple(6, "size_6x6"),
-                                             std::make_tuple(7, "size_7x7")};
+const std::array<TestType, 13> kTestParams = {
+    std::make_tuple(1, "size_1x1"),      std::make_tuple(2, "size_2x2"),    std::make_tuple(18, "size_18x18"),
+    std::make_tuple(4, "size_4x4"),      std::make_tuple(25, "size_25x25"), std::make_tuple(6, "size_6x6"),
+    std::make_tuple(75, "size_75x75"),   std::make_tuple(8, "size_8x8"),    std::make_tuple(9, "size_9x9"),
+    std::make_tuple(10, "size_10x10"),   std::make_tuple(16, "size_16x16"), std::make_tuple(50, "size_50x50"),
+    std::make_tuple(100, "size_100x100")};
 
-const auto kTestTasksList = ppc::util::AddFuncTask<SinevAMultMatrixFoxAlgorithmSEQ, InType>(
-    kTestParams, PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm);
+const auto kTestTasksList = std::tuple_cat(ppc::util::AddFuncTask<SinevAMultMatrixFoxAlgorithmOMP, InType>(
+                                               kTestParams, PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm),
+                                           ppc::util::AddFuncTask<SinevAMultMatrixFoxAlgorithmSEQ, InType>(
+                                               kTestParams, PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm)
+
+);
 
 const auto kGtestValues = ppc::util::ExpandToValues(kTestTasksList);
 

@@ -6,12 +6,12 @@
 #include <vector>
 
 #include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
+#include "sinev_a_mult_matrix_fox_algorithm/omp/include/ops_omp.hpp"
 #include "sinev_a_mult_matrix_fox_algorithm/seq/include/ops_seq.hpp"
 #include "util/include/perf_test_util.hpp"
 
 namespace sinev_a_mult_matrix_fox_algorithm {
 
-// ИСПРАВЛЕНО: Уникальное имя класса
 class SinevAPerformanceTest : public ppc::util::BaseRunPerfTests<InType, OutType> {
   InType input_data_;
   std::vector<double> expected_output_;
@@ -82,8 +82,9 @@ TEST_P(SinevAPerformanceTest, RunPerfModes) {
 
 namespace {
 
-const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks<InType, SinevAMultMatrixFoxAlgorithmSEQ>(
-    PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm);
+const auto kAllPerfTasks =
+    ppc::util::MakeAllPerfTasks<InType, SinevAMultMatrixFoxAlgorithmSEQ, SinevAMultMatrixFoxAlgorithmOMP>(
+        PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm);
 
 const auto kGtestValues = ppc::util::TupleToGTestValues(kAllPerfTasks);