Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions tasks/sinev_a_mult_matrix_fox_algorithm/omp/include/ops_omp.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#pragma once

#include <cstddef>
#include <vector>

#include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
#include "task/include/task.hpp"

namespace sinev_a_mult_matrix_fox_algorithm {

class SinevAMultMatrixFoxAlgorithmOMP : public BaseTask {
public:
static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() {
return ppc::task::TypeOfTask::kOMP;
}
explicit SinevAMultMatrixFoxAlgorithmOMP(const InType &in);

private:
bool ValidationImpl() override;
bool PreProcessingImpl() override;
bool RunImpl() override;
bool PostProcessingImpl() override;

static void SimpleMultiply(size_t n, const std::vector<double> &a, const std::vector<double> &b,
std::vector<double> &c);
static void DecomposeToBlocks(const std::vector<double> &src, std::vector<double> &dst, size_t n, size_t bs, int q);
static void AssembleFromBlocks(const std::vector<double> &src, std::vector<double> &dst, size_t n, size_t bs, int q);
static void FoxStep(const std::vector<double> &blocks_a, const std::vector<double> &blocks_b,
std::vector<double> &blocks_c, size_t bs, int q, int step);
};

} // namespace sinev_a_mult_matrix_fox_algorithm
151 changes: 151 additions & 0 deletions tasks/sinev_a_mult_matrix_fox_algorithm/omp/src/ops_omp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#include "sinev_a_mult_matrix_fox_algorithm/omp/include/ops_omp.hpp"

#include <omp.h>

#include <cmath>
#include <cstddef>
#include <vector>

#include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"

namespace sinev_a_mult_matrix_fox_algorithm {

SinevAMultMatrixFoxAlgorithmOMP::SinevAMultMatrixFoxAlgorithmOMP(const InType &in) {
SetTypeOfTask(GetStaticTypeOfTask());
GetInput() = in;
GetOutput() = {};
}

bool SinevAMultMatrixFoxAlgorithmOMP::ValidationImpl() {
const auto &[matrix_size, matrix_a, matrix_b] = GetInput();

return matrix_size > 0 && matrix_a.size() == matrix_size * matrix_size &&
matrix_b.size() == matrix_size * matrix_size;
}

bool SinevAMultMatrixFoxAlgorithmOMP::PreProcessingImpl() {
const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
GetOutput() = std::vector<double>(matrix_size * matrix_size, 0.0);
return true;
}

void SinevAMultMatrixFoxAlgorithmOMP::SimpleMultiply(size_t n, const std::vector<double> &a,
const std::vector<double> &b, std::vector<double> &c) {
#pragma omp parallel for default(none) shared(n, a, b, c) collapse(2)
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j < n; ++j) {
double sum = 0.0;
for (size_t k = 0; k < n; ++k) {
sum += a[(i * n) + k] * b[(k * n) + j];
}
c[(i * n) + j] = sum;
}
}
}

void SinevAMultMatrixFoxAlgorithmOMP::DecomposeToBlocks(const std::vector<double> &src, std::vector<double> &dst,
size_t n, size_t bs, int q) {
#pragma omp parallel for default(none) shared(src, dst, n, bs, q) collapse(2)
for (int bi = 0; bi < q; ++bi) {
for (int bj = 0; bj < q; ++bj) {
const size_t block_off = (static_cast<size_t>((bi * q) + bj)) * (bs * bs);
for (size_t i = 0; i < bs; ++i) {
for (size_t j = 0; j < bs; ++j) {
const size_t src_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j);
const size_t dst_idx = block_off + (i * bs) + j;
dst[dst_idx] = src[src_idx];
}
}
}
}
}

void SinevAMultMatrixFoxAlgorithmOMP::AssembleFromBlocks(const std::vector<double> &src, std::vector<double> &dst,
size_t n, size_t bs, int q) {
#pragma omp parallel for default(none) shared(src, dst, n, bs, q) collapse(2)
for (int bi = 0; bi < q; ++bi) {
for (int bj = 0; bj < q; ++bj) {
const size_t block_off = (static_cast<size_t>((bi * q) + bj)) * (bs * bs);
for (size_t i = 0; i < bs; ++i) {
for (size_t j = 0; j < bs; ++j) {
const size_t src_idx = block_off + (i * bs) + j;
const size_t dst_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j);
dst[dst_idx] = src[src_idx];
}
}
}
}
}

void SinevAMultMatrixFoxAlgorithmOMP::FoxStep(const std::vector<double> &blocks_a, const std::vector<double> &blocks_b,
std::vector<double> &blocks_c, size_t bs, int q, int step) {
const size_t block_size_bytes = bs * bs;
#pragma omp parallel for default(none) shared(blocks_a, blocks_b, blocks_c, bs, q, step, block_size_bytes) collapse(2)
for (int i = 0; i < q; ++i) {
for (int j = 0; j < q; ++j) {
const int k = (i + step) % q;

const size_t a_off = (static_cast<size_t>((i * q) + k)) * block_size_bytes;
const size_t b_off = (static_cast<size_t>((k * q) + j)) * block_size_bytes;
const size_t c_off = (static_cast<size_t>((i * q) + j)) * block_size_bytes;

for (size_t ii = 0; ii < bs; ++ii) {
for (size_t kk = 0; kk < bs; ++kk) {
const double val = blocks_a[a_off + (ii * bs) + kk];
for (size_t jj = 0; jj < bs; ++jj) {
blocks_c[c_off + (ii * bs) + jj] += val * blocks_b[b_off + (kk * bs) + jj];
}
}
}
}
}
}

bool SinevAMultMatrixFoxAlgorithmOMP::RunImpl() {
const auto &input = GetInput();
const size_t n = std::get<0>(input);
const auto &a = std::get<1>(input);
const auto &b = std::get<2>(input);
auto &c = GetOutput();

// Для маленьких матриц используем простое умножение
if (n <= 8) {
SimpleMultiply(n, a, b, c);
return true;
}

size_t bs = 1;
auto sqrt_n = static_cast<size_t>(std::sqrt(static_cast<double>(n)));
for (size_t div = sqrt_n; div >= 1; --div) {
if (n % div == 0) {
bs = div;
break;
}
}

const int actual_q = static_cast<int>(n / bs);

auto total_blocks = static_cast<size_t>(actual_q) * static_cast<size_t>(actual_q);
auto block_elements = bs * bs;

std::vector<double> blocks_a(total_blocks * block_elements);
std::vector<double> blocks_b(total_blocks * block_elements);
std::vector<double> blocks_c(total_blocks * block_elements, 0.0);

DecomposeToBlocks(a, blocks_a, n, bs, actual_q);
DecomposeToBlocks(b, blocks_b, n, bs, actual_q);

for (int step = 0; step < actual_q; ++step) {
FoxStep(blocks_a, blocks_b, blocks_c, bs, actual_q, step);
}

AssembleFromBlocks(blocks_c, c, n, bs, actual_q);

return true;
}

bool SinevAMultMatrixFoxAlgorithmOMP::PostProcessingImpl() {
return true;
}

} // namespace sinev_a_mult_matrix_fox_algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <vector>

#include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
#include "sinev_a_mult_matrix_fox_algorithm/omp/include/ops_omp.hpp"
#include "sinev_a_mult_matrix_fox_algorithm/seq/include/ops_seq.hpp"
#include "util/include/func_test_util.hpp"
#include "util/include/util.hpp"
Expand Down Expand Up @@ -92,13 +93,19 @@ TEST_P(SinevARunFuncTestsThreads, MatMulFoxAlg) {
ExecuteTest(GetParam());
}

const std::array<TestType, 7> kTestParams = {std::make_tuple(1, "size_1x1"), std::make_tuple(2, "size_2x2"),
std::make_tuple(3, "size_3x3"), std::make_tuple(4, "size_4x4"),
std::make_tuple(5, "size_5x5"), std::make_tuple(6, "size_6x6"),
std::make_tuple(7, "size_7x7")};
const std::array<TestType, 13> kTestParams = {
std::make_tuple(1, "size_1x1"), std::make_tuple(2, "size_2x2"), std::make_tuple(18, "size_18x18"),
std::make_tuple(4, "size_4x4"), std::make_tuple(25, "size_25x25"), std::make_tuple(6, "size_6x6"),
std::make_tuple(75, "size_75x75"), std::make_tuple(8, "size_8x8"), std::make_tuple(9, "size_9x9"),
std::make_tuple(10, "size_10x10"), std::make_tuple(16, "size_16x16"), std::make_tuple(50, "size_50x50"),
std::make_tuple(100, "size_100x100")};

const auto kTestTasksList = ppc::util::AddFuncTask<SinevAMultMatrixFoxAlgorithmSEQ, InType>(
kTestParams, PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm);
const auto kTestTasksList = std::tuple_cat(ppc::util::AddFuncTask<SinevAMultMatrixFoxAlgorithmOMP, InType>(
kTestParams, PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm),
ppc::util::AddFuncTask<SinevAMultMatrixFoxAlgorithmSEQ, InType>(
kTestParams, PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm)

);

const auto kGtestValues = ppc::util::ExpandToValues(kTestTasksList);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
#include <vector>

#include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
#include "sinev_a_mult_matrix_fox_algorithm/omp/include/ops_omp.hpp"
#include "sinev_a_mult_matrix_fox_algorithm/seq/include/ops_seq.hpp"
#include "util/include/perf_test_util.hpp"

namespace sinev_a_mult_matrix_fox_algorithm {

// ИСПРАВЛЕНО: Уникальное имя класса
class SinevAPerformanceTest : public ppc::util::BaseRunPerfTests<InType, OutType> {
InType input_data_;
std::vector<double> expected_output_;
Expand Down Expand Up @@ -82,8 +82,9 @@ TEST_P(SinevAPerformanceTest, RunPerfModes) {

namespace {

const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks<InType, SinevAMultMatrixFoxAlgorithmSEQ>(
PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm);
const auto kAllPerfTasks =
ppc::util::MakeAllPerfTasks<InType, SinevAMultMatrixFoxAlgorithmSEQ, SinevAMultMatrixFoxAlgorithmOMP>(
PPC_SETTINGS_sinev_a_mult_matrix_fox_algorithm);

const auto kGtestValues = ppc::util::TupleToGTestValues(kAllPerfTasks);

Expand Down
Loading