From 2970a07c90f448f6c28bb8df5fa11f321c5d8395 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 3 Apr 2026 13:51:52 -0500 Subject: [PATCH 01/53] Migrate RMM usage to CCCL MR design Remove dependency on rmm::mr::device_memory_resource base class. Resources now satisfy the cuda::mr::resource concept directly. - Replace shared_ptr with value types and cuda::mr::any_resource for type-erased storage - Replace set_current_device_resource(ptr) with set_current_device_resource_ref - Replace set_per_device_resource(id, ptr) with set_per_device_resource_ref - Remove make_owning_wrapper usage - Remove dynamic_cast on memory resources (no common base class) - Remove owning_wrapper.hpp and device_memory_resource.hpp includes - Add missing thrust/iterator/transform_output_iterator.h include (no longer transitively included via CCCL) --- cpp/cuopt_cli.cpp | 7 +++--- cpp/src/barrier/sparse_cholesky.cuh | 4 ++-- .../infeasibility_information.cu | 2 ++ cpp/src/routing/ges_solver.cu | 2 -- cpp/src/utilities/cuda_helpers.cuh | 20 +---------------- cpp/tests/mip/load_balancing_test.cu | 4 ++-- cpp/tests/mip/multi_probe_test.cu | 4 ++-- cpp/tests/utilities/base_fixture.hpp | 22 +++++++++---------- 8 files changed, 24 insertions(+), 41 deletions(-) diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index 899a3118b3..2cb18cc1c5 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -376,7 +376,7 @@ int main(int argc, char* argv[]) // Only initialize CUDA resources if using GPU memory backend (not remote execution) auto memory_backend = cuopt::linear_programming::get_memory_backend_type(); - std::vector> memory_resources; + std::vector memory_resources; if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) { // All arguments are parsed as string, default values are parsed as int if unused. @@ -384,10 +384,11 @@ int main(int argc, char* argv[]) ? std::stoi(program.get("--num-gpus")) : program.get("--num-gpus"); + memory_resources.reserve(std::min(raft::device_setter::get_device_count(), num_gpus)); for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) { RAFT_CUDA_TRY(cudaSetDevice(i)); - memory_resources.push_back(make_async()); - rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get()); + memory_resources.emplace_back(); + rmm::mr::set_per_device_resource_ref(rmm::cuda_device_id{i}, memory_resources.back()); } RAFT_CUDA_TRY(cudaSetDevice(0)); } diff --git a/cpp/src/barrier/sparse_cholesky.cuh b/cpp/src/barrier/sparse_cholesky.cuh index f7938fb989..52fea89502 100644 --- a/cpp/src/barrier/sparse_cholesky.cuh +++ b/cpp/src/barrier/sparse_cholesky.cuh @@ -247,8 +247,8 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { CUDSS_CALL_AND_CHECK_EXIT(cudssSetStream(handle, stream), status, "cudaStreamCreate"); mem_handler.ctx = reinterpret_cast(handle_ptr_->get_workspace_resource()); - mem_handler.device_alloc = cudss_device_alloc; - mem_handler.device_free = cudss_device_dealloc; + mem_handler.device_alloc = cudss_device_alloc; + mem_handler.device_free = cudss_device_dealloc; CUDSS_CALL_AND_CHECK_EXIT( cudssSetDeviceMemHandler(handle, &mem_handler), status, "cudssSetDeviceMemHandler"); diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index dbb35b732d..51b702eae1 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -15,6 +15,8 @@ #include +#include + #include #include #include diff --git a/cpp/src/routing/ges_solver.cu b/cpp/src/routing/ges_solver.cu index 194f73b99e..a660f84909 100644 --- a/cpp/src/routing/ges_solver.cu +++ b/cpp/src/routing/ges_solver.cu @@ -16,8 +16,6 @@ #include "adapters/assignment_adapter.cuh" #include "ges/guided_ejection_search.cuh" -#include - namespace cuopt { namespace routing { diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index 946099648d..80e7b730db 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -16,8 +16,6 @@ #include #include #include -#include -#include #include namespace cuopt { @@ -216,25 +214,9 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size) inline size_t get_device_memory_size() { - // Otherwise, we need to get the free memory from the device size_t free_mem, total_mem; cudaMemGetInfo(&free_mem, &total_mem); - - auto res = rmm::mr::get_current_device_resource(); - auto limiting_adaptor = - dynamic_cast*>(res); - // Did we specifiy an explicit memory limit? - if (limiting_adaptor) { - printf("limiting_adaptor->get_allocation_limit(): %fMiB\n", - limiting_adaptor->get_allocation_limit() / (double)1e6); - printf("used_mem: %fMiB\n", limiting_adaptor->get_allocated_bytes() / (double)1e6); - printf("free_mem: %fMiB\n", - (limiting_adaptor->get_allocation_limit() - limiting_adaptor->get_allocated_bytes()) / - (double)1e6); - return std::min(total_mem, limiting_adaptor->get_allocation_limit()); - } else { - return total_mem; - } + return total_mem; } } // namespace cuopt diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 5e2f08007d..991a3072c3 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -32,7 +32,7 @@ namespace cuopt::linear_programming::test { -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } void init_handler(const raft::handle_t* handle_ptr) { @@ -119,7 +119,7 @@ bounds_probe_results(detail::bound_presolve_t& bnd_prb_0, void test_multi_probe(std::string path) { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); + rmm::mr::set_current_device_resource_ref(memory_resource); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 073c153486..2910cb4a44 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -31,7 +31,7 @@ namespace cuopt::linear_programming::test { -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } void init_handler(const raft::handle_t* handle_ptr) { @@ -141,7 +141,7 @@ multi_probe_results( void test_multi_probe(std::string path) { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); + rmm::mr::set_current_device_resource_ref(memory_resource); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index abc69627df..c9c15ae04d 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -13,11 +13,12 @@ #include +#include + #include #include #include #include -#include #include #include @@ -25,18 +26,18 @@ namespace cuopt { namespace test { /// MR factory functions -inline auto make_cuda() { return std::make_shared(); } +inline auto make_cuda() { return rmm::mr::cuda_memory_resource(); } -inline auto make_async() { return std::make_shared(); } +inline auto make_async() { return rmm::mr::cuda_async_memory_resource(); } -inline auto make_managed() { return std::make_shared(); } +inline auto make_managed() { return rmm::mr::managed_memory_resource(); } inline auto make_pool() { // 1GB of initial pool size const size_t initial_pool_size = 1024 * 1024 * 1024; - return rmm::mr::make_owning_wrapper(make_async(), - initial_pool_size); + auto upstream = make_async(); + return rmm::mr::pool_memory_resource(upstream, initial_pool_size); } inline auto make_binning() @@ -44,8 +45,7 @@ inline auto make_binning() auto pool = make_pool(); // Add a fixed_size_memory_resource for bins of size 256, 512, 1024, 2048 and // 4096KiB Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); - return mr; + return rmm::mr::binning_memory_resource(pool, 18, 22); } /** @@ -62,7 +62,7 @@ inline auto make_binning() * Accepted types are "pool", "cuda", and "managed" only. * @return Memory resource instance */ -inline std::shared_ptr create_memory_resource( +inline cuda::mr::any_resource create_memory_resource( std::string const& allocation_mode) { if (allocation_mode == "binning") return make_binning(); @@ -120,6 +120,6 @@ inline auto parse_test_options(int argc, char** argv) auto const cmd_opts = parse_test_options(argc, argv); \ auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ auto resource = cuopt::test::create_memory_resource(rmm_mode); \ - rmm::mr::set_current_device_resource(resource.get()); \ + rmm::mr::set_current_device_resource_ref(resource); \ return RUN_ALL_TESTS(); \ } From e77dbc2375092cc95b7d186e896a052f97e0e786 Mon Sep 17 00:00:00 2001 From: Nicolas Guidotti <224634272+nguidotti@users.noreply.github.com> Date: Fri, 13 Mar 2026 14:46:57 +0100 Subject: [PATCH 02/53] split worker and worker pool in separated file. code cleanup. --- cpp/src/branch_and_bound/branch_and_bound.cpp | 15 +- cpp/src/branch_and_bound/branch_and_bound.hpp | 6 +- .../deterministic_workers.hpp | 2 +- cpp/src/branch_and_bound/pseudo_costs.hpp | 2 +- ...branch_and_bound_worker.hpp => worker.hpp} | 124 +-------------- cpp/src/branch_and_bound/worker_pool.hpp | 141 ++++++++++++++++++ 6 files changed, 158 insertions(+), 132 deletions(-) rename cpp/src/branch_and_bound/{branch_and_bound_worker.hpp => worker.hpp} (59%) create mode 100644 cpp/src/branch_and_bound/worker_pool.hpp diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 1526baa367..d5aa998eca 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -6,6 +6,7 @@ /* clang-format on */ #include +#include #include #include @@ -35,15 +36,12 @@ #include #include #include -#include #include #include #include -#include #include namespace cuopt::linear_programming::dual_simplex { - namespace { template @@ -467,11 +465,9 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu original_problem_, original_lp_, solution, new_slacks_, crushed_solution); f_t obj = compute_objective(original_lp_, crushed_solution); mutex_original_lp_.unlock(); - bool is_feasible = false; - bool attempt_repair = false; - mutex_upper_.lock(); + bool is_feasible = false; + bool attempt_repair = false; f_t current_upper_bound = upper_bound_; - mutex_upper_.unlock(); if (obj < current_upper_bound) { f_t primal_err; f_t bound_err; @@ -1184,6 +1180,9 @@ std::pair branch_and_bound_t::upd node_status_t status = node_status_t::PENDING; rounding_direction_t round_dir = rounding_direction_t::NONE; + worker->recompute_basis = true; + worker->recompute_bounds = true; + if (lp_status == dual::status_t::DUAL_UNBOUNDED) { node_ptr->lower_bound = inf; policy.graphviz(search_tree, node_ptr, "infeasible", 0.0); @@ -1243,6 +1242,8 @@ std::pair branch_and_bound_t::upd assert(dir != rounding_direction_t::NONE); policy.update_objective_estimate(node_ptr, leaf_fractional, leaf_solution.x); + worker->recompute_basis = false; + worker->recompute_bounds = false; logger_t log; log.log = false; diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 0d07cf12a5..60904406f7 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -8,12 +8,12 @@ #pragma once #include -#include #include -#include #include #include #include +#include +#include #include @@ -107,7 +107,7 @@ class branch_and_bound_t { } } - // Set a solution based on the user problem during the course of the solve + // Set a solution based on the user problem during solve time void set_new_solution(const std::vector& solution); // This queues the solution to be processed at the correct work unit timestamp diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp index 7a074051c6..7ec231c514 100644 --- a/cpp/src/branch_and_bound/deterministic_workers.hpp +++ b/cpp/src/branch_and_bound/deterministic_workers.hpp @@ -8,9 +8,9 @@ #pragma once #include -#include #include #include +#include #include diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 009bd8b81a..37a4b75039 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -7,8 +7,8 @@ #pragma once -#include #include +#include #include #include diff --git a/cpp/src/branch_and_bound/branch_and_bound_worker.hpp b/cpp/src/branch_and_bound/worker.hpp similarity index 59% rename from cpp/src/branch_and_bound/branch_and_bound_worker.hpp rename to cpp/src/branch_and_bound/worker.hpp index 4de2b43cae..8dcb7a71a5 100644 --- a/cpp/src/branch_and_bound/branch_and_bound_worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -73,8 +73,8 @@ class branch_and_bound_worker_t { pcgenerator_t rng; - bool recompute_basis = true; - bool recompute_bounds = true; + bool recompute_basis = true; + bool recompute_bounds = true; branch_and_bound_worker_t(i_t worker_id, const lp_problem_t& original_lp, @@ -117,7 +117,7 @@ class branch_and_bound_worker_t { const simplex_solver_settings_t& settings) { internal_node = node->detach_copy(); - start_node = &internal_node; + start_node = &internal_node; start_lower = original_lp.lower; start_upper = original_lp.upper; @@ -130,7 +130,7 @@ class branch_and_bound_worker_t { return node_presolver.bounds_strengthening(settings, bounds_changed, start_lower, start_upper); } - // Set the variables bounds for the LP relaxation of the current node. + // Set the variables bounds for the LP relaxation in the current node. bool set_lp_variable_bounds(mip_node_t* node_ptr, const simplex_solver_settings_t& settings) { @@ -162,120 +162,4 @@ class branch_and_bound_worker_t { mip_node_t internal_node; }; -template -class branch_and_bound_worker_pool_t { - public: - void init(i_t num_workers, - const lp_problem_t& original_lp, - const csr_matrix_t& Arow, - const std::vector& var_type, - const simplex_solver_settings_t& settings) - { - workers_.resize(num_workers); - num_idle_workers_ = num_workers; - for (i_t i = 0; i < num_workers; ++i) { - workers_[i] = std::make_unique>( - i, original_lp, Arow, var_type, settings); - idle_workers_.push_front(i); - } - - is_initialized = true; - } - - // Here, we are assuming that the scheduler is the only - // thread that can retrieve/pop an idle worker. - branch_and_bound_worker_t* get_idle_worker() - { - std::lock_guard lock(mutex_); - if (idle_workers_.empty()) { - return nullptr; - } else { - i_t idx = idle_workers_.front(); - return workers_[idx].get(); - } - } - - // Here, we are assuming that the scheduler is the only - // thread that can retrieve/pop an idle worker. - void pop_idle_worker() - { - std::lock_guard lock(mutex_); - if (!idle_workers_.empty()) { - idle_workers_.pop_front(); - num_idle_workers_--; - } - } - - void return_worker_to_pool(branch_and_bound_worker_t* worker) - { - worker->is_active = false; - std::lock_guard lock(mutex_); - idle_workers_.push_back(worker->worker_id); - num_idle_workers_++; - } - - f_t get_lower_bound() - { - f_t lower_bound = std::numeric_limits::infinity(); - - if (is_initialized) { - for (i_t i = 0; i < workers_.size(); ++i) { - if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { - lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); - } - } - } - - return lower_bound; - } - - i_t num_idle_workers() { return num_idle_workers_; } - - private: - // Worker pool - std::vector>> workers_; - bool is_initialized = false; - - omp_mutex_t mutex_; - std::deque idle_workers_; - omp_atomic_t num_idle_workers_; -}; - -template -std::vector get_search_strategies( - diving_heuristics_settings_t settings) -{ - std::vector types; - types.reserve(num_search_strategies); - types.push_back(BEST_FIRST); - if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); } - if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); } - if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); } - if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); } - return types; -} - -template -std::array get_max_workers( - i_t num_workers, const std::vector& strategies) -{ - std::array max_num_workers; - max_num_workers.fill(0); - - i_t bfs_workers = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1); - max_num_workers[BEST_FIRST] = bfs_workers; - - i_t diving_workers = (num_workers - bfs_workers); - i_t m = strategies.size() - 1; - - for (size_t i = 1, k = 0; i < strategies.size(); ++i) { - i_t start = (double)k * diving_workers / m; - i_t end = (double)(k + 1) * diving_workers / m; - max_num_workers[strategies[i]] = end - start; - ++k; - } - - return max_num_workers; -} - } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp new file mode 100644 index 0000000000..2396b88914 --- /dev/null +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -0,0 +1,141 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +class branch_and_bound_worker_pool_t { + public: + void init(i_t num_workers, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings) + { + workers_.resize(num_workers); + num_idle_workers_ = num_workers; + for (i_t i = 0; i < num_workers; ++i) { + workers_[i] = std::make_unique>( + i, original_lp, Arow, var_type, settings); + idle_workers_.push_front(i); + } + + is_initialized = true; + } + + // Here, we are assuming that the scheduler is the only + // thread that can retrieve/pop an idle worker. + branch_and_bound_worker_t* get_idle_worker() + { + std::lock_guard lock(mutex_); + if (idle_workers_.empty()) { + return nullptr; + } else { + i_t idx = idle_workers_.front(); + return workers_[idx].get(); + } + } + + // Here, we are assuming that the scheduler is the only + // thread that can retrieve/pop an idle worker. + void pop_idle_worker() + { + std::lock_guard lock(mutex_); + if (!idle_workers_.empty()) { + idle_workers_.pop_front(); + num_idle_workers_--; + } + } + + void return_worker_to_pool(branch_and_bound_worker_t* worker) + { + worker->is_active = false; + std::lock_guard lock(mutex_); + idle_workers_.push_back(worker->worker_id); + num_idle_workers_++; + } + + f_t get_lower_bound() + { + f_t lower_bound = std::numeric_limits::infinity(); + + if (is_initialized) { + for (i_t i = 0; i < workers_.size(); ++i) { + if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { + lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); + } + } + } + + return lower_bound; + } + + i_t num_idle_workers() { return num_idle_workers_; } + + void broadcast_root_bounds_change() + { + if (is_initialized) { + for (i_t i = 0; i < workers_.size(); ++i) { + if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { + workers_[i]->start_bounds_updated = true; + } + } + } + } + + private: + // Worker pool + std::vector>> workers_; + bool is_initialized = false; + + omp_mutex_t mutex_; + std::deque idle_workers_; + omp_atomic_t num_idle_workers_; +}; + +template +std::vector get_search_strategies( + diving_heuristics_settings_t settings) +{ + std::vector types; + types.reserve(num_search_strategies); + types.push_back(BEST_FIRST); + if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); } + if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); } + if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); } + if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); } + return types; +} + +template +std::array get_max_workers( + i_t num_workers, const std::vector& strategies) +{ + std::array max_num_workers; + max_num_workers.fill(0); + + i_t bfs_workers = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1); + max_num_workers[BEST_FIRST] = bfs_workers; + + i_t diving_workers = (num_workers - bfs_workers); + i_t m = strategies.size() - 1; + + for (size_t i = 1, k = 0; i < strategies.size(); ++i) { + i_t start = (double)k * diving_workers / m; + i_t end = (double)(k + 1) * diving_workers / m; + max_num_workers[strategies[i]] = end - start; + ++k; + } + + return max_num_workers; +} + +} // namespace cuopt::linear_programming::dual_simplex \ No newline at end of file From 62d0452a283d3130bfaa326cada6c4bab7276219 Mon Sep 17 00:00:00 2001 From: Nicolas Guidotti Date: Wed, 1 Apr 2026 17:08:44 +0200 Subject: [PATCH 03/53] simplified logic for pseudo cost (and its snapshot) for the regular and deterministic mode. Signed-off-by: Nicolas Guidotti <224634272+nguidotti@users.noreply.github.com> --- cpp/src/branch_and_bound/branch_and_bound.cpp | 41 +- cpp/src/branch_and_bound/constants.hpp | 31 ++ .../deterministic_workers.hpp | 20 +- .../branch_and_bound/diving_heuristics.cpp | 133 ++++- .../branch_and_bound/diving_heuristics.hpp | 8 +- cpp/src/branch_and_bound/mip_node.hpp | 10 +- cpp/src/branch_and_bound/pseudo_costs.cpp | 212 +++---- cpp/src/branch_and_bound/pseudo_costs.hpp | 524 +++++------------- cpp/src/branch_and_bound/worker.hpp | 19 +- cpp/src/utilities/omp_helpers.hpp | 14 + 10 files changed, 451 insertions(+), 561 deletions(-) create mode 100644 cpp/src/branch_and_bound/constants.hpp diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index d5aa998eca..b60548a525 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1031,7 +1031,9 @@ struct deterministic_bfs_policy_t const std::vector& fractional, const std::vector& x) override { - i_t var = this->worker.pc_snapshot.variable_selection(fractional, x); + logger_t log; + log.log = false; + i_t var = this->worker.pc_snapshot.variable_selection(fractional, x, log); auto dir = martin_criteria(x[var], this->bnb.root_relax_soln_.x[var]); return {var, dir}; } @@ -1040,8 +1042,10 @@ struct deterministic_bfs_policy_t const std::vector& fractional, const std::vector& x) override { + logger_t log; + log.log = false; node->objective_estimate = - this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound); + this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound, log); } void on_node_completed(mip_node_t* node, @@ -1106,25 +1110,23 @@ struct deterministic_diving_policy_t const std::vector& fractional, const std::vector& x) override { + logger_t log; + log.log = false; + switch (this->worker.diving_type) { case search_strategy_t::PSEUDOCOST_DIVING: - return this->worker.variable_selection_from_snapshot(fractional, x); + return pseudocost_diving( + this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); case search_strategy_t::LINE_SEARCH_DIVING: - if (this->worker.root_solution) { - logger_t log; - log.log = false; - return line_search_diving(fractional, x, *this->worker.root_solution, log); - } - return this->worker.variable_selection_from_snapshot(fractional, x); + return line_search_diving(fractional, x, *this->worker.root_solution, log); case search_strategy_t::GUIDED_DIVING: - return this->worker.guided_variable_selection(fractional, x); + return guided_diving( + this->worker.pc_snapshot, fractional, x, this->worker.incumbent_snapshot, log); case search_strategy_t::COEFFICIENT_DIVING: { - logger_t log; - log.log = false; - return coefficient_diving(this->bnb.original_lp_, + return coefficient_diving(this->worker.leaf_problem, fractional, x, this->bnb.var_up_locks_, @@ -1132,7 +1134,7 @@ struct deterministic_diving_policy_t log); } - default: return this->worker.variable_selection_from_snapshot(fractional, x); + default: CUOPT_LOG_ERROR("Invalid diving method!"); return {-1, rounding_direction_t::NONE}; } } @@ -3318,11 +3320,12 @@ template void branch_and_bound_t::deterministic_broadcast_snapshots( PoolT& pool, const std::vector& incumbent_snapshot) { - deterministic_snapshot_t snap; - snap.upper_bound = upper_bound_.load(); - snap.total_lp_iters = exploration_stats_.total_lp_iters.load(); - snap.incumbent = incumbent_snapshot; - snap.pc_snapshot = pc_.create_snapshot(); + deterministic_snapshot_t snap{ + .upper_bound = upper_bound_, + .pc_snapshot = pc_, + .incumbent = incumbent_snapshot, + .total_lp_iters = exploration_stats_.total_lp_iters, + }; for (auto& worker : pool) { worker.set_snapshots(snap); diff --git a/cpp/src/branch_and_bound/constants.hpp b/cpp/src/branch_and_bound/constants.hpp new file mode 100644 index 0000000000..bad176b3d2 --- /dev/null +++ b/cpp/src/branch_and_bound/constants.hpp @@ -0,0 +1,31 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +namespace cuopt::linear_programming::dual_simplex { + +constexpr int num_search_strategies = 5; + +// Indicate the search and variable selection algorithms used by each thread +// in B&B (See [1]). +// +// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, +// Berlin, 2007. doi: 10.14279/depositonce-1634. +enum search_strategy_t : int { + BEST_FIRST = 0, // Best-First + Plunging. + PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) + LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) + GUIDED_DIVING = 3, // Guided diving (9.2.3). + COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) +}; + +enum class rounding_direction_t { NONE = -1, DOWN = 0, UP = 1 }; + +enum class branch_and_bound_mode_t { REGULAR = 0, DETERMINISTIC = 1 }; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp index 7ec231c514..acafe329f9 100644 --- a/cpp/src/branch_and_bound/deterministic_workers.hpp +++ b/cpp/src/branch_and_bound/deterministic_workers.hpp @@ -58,7 +58,7 @@ struct deterministic_snapshot_t { f_t upper_bound; pseudo_cost_snapshot_t pc_snapshot; std::vector incumbent; - i_t total_lp_iters; + int64_t total_lp_iters; }; template @@ -90,7 +90,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { const std::vector& var_types, const simplex_solver_settings_t& settings, const std::string& context_name) - : base_t(id, original_lp, Arow, var_types, settings), work_context(context_name) + : base_t(id, original_lp, Arow, var_types, settings), work_context(context_name), pc_snapshot(1) { work_context.deterministic = true; } @@ -342,22 +342,6 @@ class deterministic_diving_worker_t {objective, solution, depth, this->worker_id, this->next_solution_seq++}); ++this->total_integer_solutions; } - - branch_variable_t variable_selection_from_snapshot(const std::vector& fractional, - const std::vector& solution) const - { - assert(root_solution != nullptr); - return this->pc_snapshot.pseudocost_diving(fractional, solution, *root_solution); - } - - branch_variable_t guided_variable_selection(const std::vector& fractional, - const std::vector& solution) const - { - if (this->incumbent_snapshot.empty()) { - return variable_selection_from_snapshot(fractional, solution); - } - return this->pc_snapshot.guided_diving(fractional, solution, this->incumbent_snapshot); - } }; template diff --git a/cpp/src/branch_and_bound/diving_heuristics.cpp b/cpp/src/branch_and_bound/diving_heuristics.cpp index f9791280a6..571027c1d7 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.cpp +++ b/cpp/src/branch_and_bound/diving_heuristics.cpp @@ -65,38 +65,117 @@ branch_variable_t line_search_diving(const std::vector& fractional, return {branch_var, round_dir}; } -template -branch_variable_t pseudocost_diving(pseudo_costs_t& pc, +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& root_solution, logger_t& log) { - return pseudocost_diving_from_arrays(pc.pseudo_cost_sum_down.data(), - pc.pseudo_cost_sum_up.data(), - pc.pseudo_cost_num_down.data(), - pc.pseudo_cost_num_up.data(), - (i_t)pc.pseudo_cost_sum_down.size(), - fractional, - solution, - root_solution); + const i_t num_fractional = fractional.size(); + if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; + + pseudo_cost_averages_t avgs = pc.compute_averages(); + + i_t branch_var = fractional[0]; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::DOWN; + constexpr f_t eps = f_t(1e-6); + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : avgs.down_avg; + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : avgs.up_avg; + + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); + + f_t score = 0; + rounding_direction_t dir = rounding_direction_t::DOWN; + + f_t root_val = (j < static_cast(root_solution.size())) ? root_solution[j] : solution[j]; + + if (solution[j] < root_val - f_t(0.4)) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (solution[j] > root_val + f_t(0.4)) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (f_down < f_t(0.3)) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (f_down > f_t(0.7)) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (pc_down < pc_up + eps) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else { + score = score_up; + dir = rounding_direction_t::UP; + } + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + if (round_dir == rounding_direction_t::NONE) { + branch_var = fractional[0]; + round_dir = rounding_direction_t::DOWN; + } + + return {branch_var, round_dir}; } -template -branch_variable_t guided_diving(pseudo_costs_t& pc, +template +branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, logger_t& log) { - return guided_diving_from_arrays(pc.pseudo_cost_sum_down.data(), - pc.pseudo_cost_sum_up.data(), - pc.pseudo_cost_num_down.data(), - pc.pseudo_cost_num_up.data(), - (i_t)pc.pseudo_cost_sum_down.size(), - fractional, - solution, - incumbent); + const i_t num_fractional = fractional.size(); + if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; + + pseudo_cost_averages_t avgs = pc.compute_averages(); + + i_t branch_var = fractional[0]; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::DOWN; + constexpr f_t eps = f_t(1e-6); + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); + f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); + rounding_direction_t dir = + down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; + + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : avgs.down_avg; + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : avgs.up_avg; + f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; + f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; + f_t score = (score1 + score2) / 6; + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + return {branch_var, round_dir}; } template @@ -187,12 +266,26 @@ template branch_variable_t pseudocost_diving(pseudo_costs_t& p const std::vector& root_solution, logger_t& log); +template branch_variable_t pseudocost_diving( + pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + template branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, logger_t& log); +template branch_variable_t guided_diving( + pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log); + template void calculate_variable_locks(const lp_problem_t& lp_problem, std::vector& up_locks, std::vector& down_locks); diff --git a/cpp/src/branch_and_bound/diving_heuristics.hpp b/cpp/src/branch_and_bound/diving_heuristics.hpp index dfeabe3a5f..325aa0b878 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.hpp +++ b/cpp/src/branch_and_bound/diving_heuristics.hpp @@ -22,15 +22,15 @@ branch_variable_t line_search_diving(const std::vector& fractional, const std::vector& root_solution, logger_t& log); -template -branch_variable_t pseudocost_diving(pseudo_costs_t& pc, +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& root_solution, logger_t& log); -template -branch_variable_t guided_diving(pseudo_costs_t& pc, +template +branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index a24f67c3bc..cce23c3bd7 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -7,6 +7,8 @@ #pragma once +#include + #include #include @@ -29,9 +31,11 @@ enum class node_status_t : int { NUMERICAL = 5 // Encountered numerical issue when solving the LP relaxation }; -enum class rounding_direction_t : int8_t { NONE = -1, DOWN = 0, UP = 1 }; - -bool inactive_status(node_status_t status); +inline bool inactive_status(node_status_t status) +{ + return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE || + status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL); +} template class mip_node_t { diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index c38e98e27d..c92ab74fce 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -218,8 +218,10 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& nonbasic_list, const std::vector& fractional, + const csc_matrix_t& AT, basis_update_mpf_t& basis_factors, - pseudo_costs_t& pc) + std::vector& strong_branch_down, + std::vector& strong_branch_up) { i_t m = lp.num_rows; i_t n = lp.num_cols; @@ -246,7 +248,7 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, objective_change_estimate_t estimate = single_pivot_objective_change_estimate(lp, settings, - pc.AT, + AT, vstatus, j, basic_map[j], @@ -258,8 +260,8 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, workspace, delta_z, work_estimate); - pc.strong_branch_down[k] = estimate.down_obj_change; - pc.strong_branch_up[k] = estimate.up_obj_change; + strong_branch_down[k] = estimate.down_obj_change; + strong_branch_up[k] = estimate.up_obj_change; } } @@ -295,10 +297,15 @@ void strong_branch_helper(i_t start, const std::vector& root_soln, const std::vector& root_vstatus, const std::vector& edge_norms, + std::vector& strong_branch_down, + std::vector& strong_branch_up, + omp_atomic_t& num_strong_branches_completed) f_t root_obj, f_t upper_bound, i_t iter_limit, - pseudo_costs_t& pc, +std::vector& strong_branch_down, + std::vector& strong_branch_up, + omp_atomic_t& num_strong_branches_completed, std::vector& dual_simplex_obj_down, std::vector& dual_simplex_obj_up, std::vector& dual_simplex_status_down, @@ -380,7 +387,7 @@ void strong_branch_helper(i_t start, } if (branch == 0) { - pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0); + strong_branch_down[k] = std::max(obj - root_obj, 0.0); dual_simplex_obj_down[k] = std::max(obj - root_obj, 0.0); dual_simplex_status_down[k] = status; if (verbose) { @@ -393,7 +400,7 @@ void strong_branch_helper(i_t start, toc(start_time)); } } else { - pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0); + strong_branch_up[k] = std::max(obj - root_obj, 0.0); dual_simplex_obj_up[k] = std::max(obj - root_obj, 0.0); dual_simplex_status_up[k] = status; if (verbose) { @@ -431,7 +438,7 @@ void strong_branch_helper(i_t start, } if (toc(start_time) > settings.time_limit) { break; } - const i_t completed = pc.num_strong_branches_completed++; + const i_t completed = num_strong_branches_completed++; if (thread_id == 0 && toc(last_log) > 10) { last_log = tic(); @@ -463,7 +470,7 @@ std::pair trial_branching(const lp_problem_t& ori f_t upper_bound, f_t start_time, i_t iter_limit, - omp_atomic_t& total_lp_iter) + i_t& iter) { lp_problem_t child_problem = original_lp; child_problem.lower[branch_var] = branch_var_lower; @@ -479,7 +486,7 @@ std::pair trial_branching(const lp_problem_t& ori objective_upper_bound(child_problem, upper_bound, child_settings.dual_tol); lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); - i_t iter = 0; + iter = 0; std::vector child_vstatus = vstatus; std::vector child_edge_norms = edge_norms; std::vector child_basic_list = basic_list; @@ -502,7 +509,7 @@ std::pair trial_branching(const lp_problem_t& ori solution, iter, child_edge_norms); - total_lp_iter += iter; + settings.log.debug("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %s. Obj %e\n", branch_var, child_problem.lower[branch_var], @@ -699,6 +706,10 @@ static void batch_pdlp_strong_branching_task( std::vector& pdlp_obj_up) { constexpr bool verbose = false; + pc.resize(original_lp.num_cols); + std::vector strong_branch_down(fractional.size(), 0); + std::vector strong_branch_up(fractional.size(), 0); + omp_atomic_t num_strong_branches_completed = 0; settings.log.printf(effective_batch_pdlp == 2 ? "Batch PDLP only for strong branching\n" @@ -1049,8 +1060,10 @@ void strong_branching(const lp_problem_t& original_lp, basic_list, nonbasic_list, fractional, + pc.AT, basis_factors, - pc); + strong_branch_down, + strong_branch_up); } else { #pragma omp parallel num_threads(settings.num_threads) { @@ -1105,7 +1118,9 @@ void strong_branching(const lp_problem_t& original_lp, root_obj, upper_bound, simplex_iteration_limit, - pc, + strong_branch_down, +strong_branch_up, +num_strong_branches_completed, dual_simplex_obj_down, dual_simplex_obj_up, dual_simplex_status_down, @@ -1226,28 +1241,27 @@ void strong_branching(const lp_problem_t& original_lp, } } - pc.update_pseudo_costs_from_strong_branching(fractional, root_solution.x); + pc.update_pseudo_costs_from_strong_branching( + fractional, strong_branch_down, strong_branch_up, root_solution.x); } -template -f_t pseudo_costs_t::calculate_pseudocost_score(i_t j, - const std::vector& solution, - f_t pseudo_cost_up_avg, - f_t pseudo_cost_down_avg) const +template +f_t pseudo_costs_t::calculate_pseudocost_score( + i_t j, const std::vector& solution, pseudo_cost_averages_t averages) const { constexpr f_t eps = 1e-6; i_t num_up = pseudo_cost_num_up[j]; i_t num_down = pseudo_cost_num_down[j]; - f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; - f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; + f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : averages.up_avg; + f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : averages.down_avg; f_t f_down = solution[j] - std::floor(solution[j]); f_t f_up = std::ceil(solution[j]) - solution[j]; return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); } -template -void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, - f_t leaf_objective) +template +void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, + f_t leaf_objective) { const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0); const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN @@ -1263,43 +1277,54 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt } } -template -void pseudo_costs_t::initialized(i_t& num_initialized_down, - i_t& num_initialized_up, - f_t& pseudo_cost_down_avg, - f_t& pseudo_cost_up_avg) const +template +pseudo_cost_averages_t pseudo_costs_t::compute_averages() const { - auto avgs = compute_pseudo_cost_averages(pseudo_cost_sum_down.data(), - pseudo_cost_sum_up.data(), - pseudo_cost_num_down.data(), - pseudo_cost_num_up.data(), - pseudo_cost_sum_down.size()); - pseudo_cost_down_avg = avgs.down_avg; - pseudo_cost_up_avg = avgs.up_avg; + i_t num_initialized_down = 0; + i_t num_initialized_up = 0; + f_t pseudo_cost_down_avg = 0.0; + f_t pseudo_cost_up_avg = 0.0; + + for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { + if (pseudo_cost_num_down[j] > 0) { + ++num_initialized_down; + if (std::isfinite(pseudo_cost_sum_down[j])) { + pseudo_cost_down_avg += pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + } + } + if (pseudo_cost_num_up[j] > 0) { + ++num_initialized_up; + if (std::isfinite(pseudo_cost_sum_up[j])) { + pseudo_cost_up_avg += pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; + } + } + } + + pseudo_cost_averages_t averages{ + .down_avg = (num_initialized_down > 0) ? pseudo_cost_down_avg / num_initialized_down : 1.0, + .num_init_down = num_initialized_down, + .up_avg = (num_initialized_up > 0) ? pseudo_cost_up_avg / num_initialized_up : 1.0, + .num_init_up = num_initialized_up}; + return averages; } -template -i_t pseudo_costs_t::variable_selection(const std::vector& fractional, - const std::vector& solution, - logger_t& log) +template +i_t pseudo_costs_t::variable_selection(const std::vector& fractional, + const std::vector& solution, + logger_t& log) { - i_t branch_var = fractional[0]; - f_t max_score = -1; - i_t num_initialized_down; - i_t num_initialized_up; - f_t pseudo_cost_down_avg; - f_t pseudo_cost_up_avg; - - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + i_t branch_var = fractional[0]; + f_t max_score = -1; + pseudo_cost_averages_t averages = compute_averages(); log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + averages.num_init_down, + averages.num_init_up, + averages.down_avg, + averages.up_avg); for (i_t j : fractional) { - f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, solution, averages); if (score > max_score) { max_score = score; @@ -1315,8 +1340,8 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio return branch_var; } -template -i_t pseudo_costs_t::reliable_variable_selection( +template +i_t pseudo_costs_t::reliable_variable_selection( const mip_node_t* node_ptr, const std::vector& fractional, branch_and_bound_worker_t* worker, @@ -1329,12 +1354,11 @@ i_t pseudo_costs_t::reliable_variable_selection( const std::vector& new_slacks, const lp_problem_t& original_lp) { - constexpr f_t eps = 1e-6; - f_t start_time = bnb_stats.start_time; - i_t branch_var = fractional[0]; - f_t max_score = -1; - f_t pseudo_cost_down_avg = -1; - f_t pseudo_cost_up_avg = -1; + constexpr f_t eps = 1e-6; + f_t start_time = bnb_stats.start_time; + i_t branch_var = fractional[0]; + f_t max_score = -1; + pseudo_cost_averages_t averages; lp_solution_t& leaf_solution = worker->leaf_solution; const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters; @@ -1367,14 +1391,12 @@ i_t pseudo_costs_t::reliable_variable_selection( // In the latter, we are not using the average pseudocost (which calculated in the `initialized` // method). if (reliable_threshold == 0) { - i_t num_initialized_up; - i_t num_initialized_down; - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + averages = compute_averages(); log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + averages.num_init_down, + averages.num_init_up, + averages.down_avg, + averages.up_avg); } std::vector> unreliable_list; @@ -1386,8 +1408,7 @@ i_t pseudo_costs_t::reliable_variable_selection( unreliable_list.push_back(std::make_pair(-1, j)); continue; } - f_t score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); if (score > max_score) { max_score = score; @@ -1456,7 +1477,7 @@ i_t pseudo_costs_t::reliable_variable_selection( log.printf( "RB iters = %d, B&B iters = %d, unreliable = %d, num_tasks = %d, reliable_threshold = %d\n", - strong_branching_lp_iter.load(), + static_cast(strong_branching_lp_iter), branch_and_bound_lp_iters, unreliable_list.size(), num_tasks, @@ -1503,8 +1524,7 @@ i_t pseudo_costs_t::reliable_variable_selection( score = std::max(estimate.up_obj_change, eps) * std::max(estimate.down_obj_change, eps); } else { // Use the previous score, even if it is unreliable - score = calculate_pseudocost_score( - j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + score = calculate_pseudocost_score(j, leaf_solution.x, averages); } } } else { @@ -1596,6 +1616,7 @@ i_t pseudo_costs_t::reliable_variable_selection( pseudo_cost_mutex_down[j].lock(); if (pseudo_cost_num_down[j] < reliable_threshold) { // Do trial branching on the down branch + i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, @@ -1610,7 +1631,8 @@ i_t pseudo_costs_t::reliable_variable_selection( upper_bound, start_time, iter_limit_per_trial, - strong_branching_lp_iter); + iter); + strong_branching_lp_iter += iter; dual_simplex_obj_down[i] = obj; dual_simplex_status_down[i] = status; @@ -1639,6 +1661,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } else { pseudo_cost_mutex_up[j].lock(); if (pseudo_cost_num_up[j] < reliable_threshold) { + i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, @@ -1653,7 +1676,8 @@ i_t pseudo_costs_t::reliable_variable_selection( upper_bound, start_time, iter_limit_per_trial, - strong_branching_lp_iter); + iter); + strong_branching_lp_iter += iter; dual_simplex_obj_up[i] = obj; dual_simplex_status_up[i] = status; @@ -1674,9 +1698,7 @@ i_t pseudo_costs_t::reliable_variable_selection( if (toc(start_time) > settings.time_limit) { continue; } - score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); - + score = calculate_pseudocost_score(j, leaf_solution.x, averages); score_mutex.lock(); if (score > max_score) { max_score = score; @@ -1777,28 +1799,23 @@ i_t pseudo_costs_t::reliable_variable_selection( return branch_var; } -template -f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, - const std::vector& solution, - f_t lower_bound, - logger_t& log) +template +f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log) { const i_t num_fractional = fractional.size(); f_t estimate = lower_bound; - i_t num_initialized_down; - i_t num_initialized_up; - f_t pseudo_cost_down_avg; - f_t pseudo_cost_up_avg; - - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + pseudo_cost_averages_t averages = compute_averages(); for (i_t j : fractional) { constexpr f_t eps = 1e-6; i_t num_up = pseudo_cost_num_up[j]; i_t num_down = pseudo_cost_num_down[j]; - f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; - f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; + f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : averages.up_avg; + f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : averages.down_avg; f_t f_down = solution[j] - std::floor(solution[j]); f_t f_up = std::ceil(solution[j]) - solution[j]; estimate += std::min(pc_down * f_down, pc_up * f_up); @@ -1808,9 +1825,12 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, return estimate; } -template -void pseudo_costs_t::update_pseudo_costs_from_strong_branching( - const std::vector& fractional, const std::vector& root_soln) +template +void pseudo_costs_t::update_pseudo_costs_from_strong_branching( + const std::vector& fractional, + const std::vector& strong_branch_down, + const std::vector& strong_branch_up, + const std::vector& root_soln) { for (i_t k = 0; k < fractional.size(); k++) { const i_t j = fractional[k]; @@ -1834,7 +1854,9 @@ void pseudo_costs_t::update_pseudo_costs_from_strong_branching( #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template class pseudo_costs_t; +template class pseudo_costs_t; +template class pseudo_costs_t; +template class pseudo_cost_snapshot_t; template void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 37a4b75039..d980cd2767 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -27,354 +27,6 @@ namespace cuopt::linear_programming::dual_simplex { -template -struct branch_variable_t { - i_t variable; - rounding_direction_t direction; -}; - -template -struct pseudo_cost_update_t { - i_t variable; - rounding_direction_t direction; - f_t delta; - double work_timestamp; - int worker_id; - - bool operator<(const pseudo_cost_update_t& other) const - { - if (work_timestamp != other.work_timestamp) return work_timestamp < other.work_timestamp; - if (variable != other.variable) return variable < other.variable; - if (delta != other.delta) return delta < other.delta; - return worker_id < other.worker_id; - } -}; - -template -struct pseudo_cost_averages_t { - f_t down_avg; - f_t up_avg; -}; - -// used to get T from omp_atomic_t based on the fact that omp_atomic_t::operator++ returns T -template -using underlying_type = decltype(std::declval()++); - -// Necessary because omp_atomic_t may be passed instead of f_t -template -auto compute_pseudo_cost_averages(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - size_t n) -{ - using underlying_f_t = underlying_type; - using underlying_i_t = underlying_type; - - underlying_i_t num_initialized_down = 0; - underlying_i_t num_initialized_up = 0; - underlying_f_t pseudo_cost_down_avg = 0.0; - underlying_f_t pseudo_cost_up_avg = 0.0; - - for (size_t j = 0; j < n; ++j) { - if (pc_num_down[j] > 0) { - ++num_initialized_down; - if (std::isfinite(pc_sum_down[j])) { - pseudo_cost_down_avg += pc_sum_down[j] / pc_num_down[j]; - } - } - if (pc_num_up[j] > 0) { - ++num_initialized_up; - if (std::isfinite(pc_sum_up[j])) { pseudo_cost_up_avg += pc_sum_up[j] / pc_num_up[j]; } - } - } - - pseudo_cost_down_avg = - (num_initialized_down > 0) ? pseudo_cost_down_avg / num_initialized_down : 1.0; - pseudo_cost_up_avg = (num_initialized_up > 0) ? pseudo_cost_up_avg / num_initialized_up : 1.0; - - return pseudo_cost_averages_t{pseudo_cost_down_avg, pseudo_cost_up_avg}; -} - -// Variable selection using pseudo-cost product scoring -// Returns the best variable to branch on -template -i_t variable_selection_from_pseudo_costs(const f_t* pc_sum_down, - const f_t* pc_sum_up, - const i_t* pc_num_down, - const i_t* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return -1; - - auto [pc_down_avg, pc_up_avg] = - compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t pc_down = pc_num_down[j] != 0 ? pc_sum_down[j] / pc_num_down[j] : pc_down_avg; - f_t pc_up = pc_num_up[j] != 0 ? pc_sum_up[j] / pc_num_up[j] : pc_up_avg; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); - if (score > max_score) { - max_score = score; - branch_var = j; - } - } - - return branch_var; -} - -// Objective estimate using pseudo-costs (lock-free implementation) -// Returns lower_bound + estimated cost to reach integer feasibility -template -f_t obj_estimate_from_arrays(const f_t* pc_sum_down, - const f_t* pc_sum_up, - const i_t* pc_num_down, - const i_t* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - f_t lower_bound) -{ - auto [pc_down_avg, pc_up_avg] = - compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - f_t estimate = lower_bound; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t pc_down = pc_num_down[j] != 0 ? pc_sum_down[j] / pc_num_down[j] : pc_down_avg; - f_t pc_up = pc_num_up[j] != 0 ? pc_sum_up[j] / pc_num_up[j] : pc_up_avg; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - estimate += std::min(std::max(pc_down * f_down, eps), std::max(pc_up * f_up, eps)); - } - - return estimate; -} - -template -branch_variable_t pseudocost_diving_from_arrays(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - const std::vector& root_solution) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; - - auto avgs = compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - rounding_direction_t round_dir = rounding_direction_t::DOWN; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t pc_down = pc_num_down[j] != 0 ? (f_t)pc_sum_down[j] / (f_t)pc_num_down[j] : avgs.down_avg; - f_t pc_up = pc_num_up[j] != 0 ? (f_t)pc_sum_up[j] / (f_t)pc_num_up[j] : avgs.up_avg; - - f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); - f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); - - f_t score = 0; - rounding_direction_t dir = rounding_direction_t::DOWN; - - f_t root_val = (j < static_cast(root_solution.size())) ? root_solution[j] : solution[j]; - - if (solution[j] < root_val - f_t(0.4)) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else if (solution[j] > root_val + f_t(0.4)) { - score = score_up; - dir = rounding_direction_t::UP; - } else if (f_down < f_t(0.3)) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else if (f_down > f_t(0.7)) { - score = score_up; - dir = rounding_direction_t::UP; - } else if (pc_down < pc_up + eps) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else { - score = score_up; - dir = rounding_direction_t::UP; - } - - if (score > max_score) { - max_score = score; - branch_var = j; - round_dir = dir; - } - } - - if (round_dir == rounding_direction_t::NONE) { - branch_var = fractional[0]; - round_dir = rounding_direction_t::DOWN; - } - - return {branch_var, round_dir}; -} - -template -branch_variable_t guided_diving_from_arrays(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - const std::vector& incumbent) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; - - auto avgs = compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - rounding_direction_t round_dir = rounding_direction_t::DOWN; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); - f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); - rounding_direction_t dir = - down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; - - f_t pc_down = pc_num_down[j] != 0 ? (f_t)pc_sum_down[j] / (f_t)pc_num_down[j] : avgs.down_avg; - f_t pc_up = pc_num_up[j] != 0 ? (f_t)pc_sum_up[j] / (f_t)pc_num_up[j] : avgs.up_avg; - - f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; - f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; - f_t score = (score1 + score2) / 6; - - if (score > max_score) { - max_score = score; - branch_var = j; - round_dir = dir; - } - } - - return {branch_var, round_dir}; -} - -template -class pseudo_cost_snapshot_t { - public: - pseudo_cost_snapshot_t() = default; - - pseudo_cost_snapshot_t(std::vector sum_down, - std::vector sum_up, - std::vector num_down, - std::vector num_up) - : sum_down_(std::move(sum_down)), - sum_up_(std::move(sum_up)), - num_down_(std::move(num_down)), - num_up_(std::move(num_up)) - { - } - - i_t variable_selection(const std::vector& fractional, const std::vector& solution) const - { - return variable_selection_from_pseudo_costs(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution); - } - - f_t obj_estimate(const std::vector& fractional, - const std::vector& solution, - f_t lower_bound) const - { - return obj_estimate_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - lower_bound); - } - - branch_variable_t pseudocost_diving(const std::vector& fractional, - const std::vector& solution, - const std::vector& root_solution) const - { - return pseudocost_diving_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - root_solution); - } - - branch_variable_t guided_diving(const std::vector& fractional, - const std::vector& solution, - const std::vector& incumbent) const - { - return guided_diving_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - incumbent); - } - - void queue_update( - i_t variable, rounding_direction_t direction, f_t delta, double clock, int worker_id) - { - updates_.push_back({variable, direction, delta, clock, worker_id}); - if (direction == rounding_direction_t::DOWN) { - sum_down_[variable] += delta; - num_down_[variable]++; - } else { - sum_up_[variable] += delta; - num_up_[variable]++; - } - } - - std::vector> take_updates() - { - std::vector> result; - result.swap(updates_); - return result; - } - - i_t n_vars() const { return (i_t)sum_down_.size(); } - - std::vector sum_down_; - std::vector sum_up_; - std::vector num_down_; - std::vector num_up_; - - private: - std::vector> updates_; -}; - template struct reliability_branching_settings_t { // Lower bound for the maximum number of LP iterations for a single trial branching @@ -413,6 +65,13 @@ struct reliability_branching_settings_t { bool rank_candidates_with_dual_pivot = true; }; +template +struct branch_variable_t { + i_t variable; + rounding_direction_t direction; +}; + + template struct batch_pdlp_warm_cache_t { const raft::handle_t batch_pdlp_handle{}; @@ -426,8 +85,58 @@ struct batch_pdlp_warm_cache_t { }; template +struct pseudo_cost_averages_t { + f_t down_avg; + i_t num_init_down; + f_t up_avg; + i_t num_init_up; +}; + +template +struct pseudo_cost_update_t { + i_t variable; + rounding_direction_t direction; + f_t delta; + double work_timestamp; + int worker_id; + + bool operator<(const pseudo_cost_update_t& other) const + { + if (work_timestamp != other.work_timestamp) return work_timestamp < other.work_timestamp; + if (variable != other.variable) return variable < other.variable; + if (delta != other.delta) return delta < other.delta; + return worker_id < other.worker_id; + } +}; + +// `BnBMode` specify how we control the memory accesses: +// - If `BnBMode == branch_and_bound_mode_t::REGULAR`, then we assume that this object is shared +// among the B&B threads, and thus, require atomics and mutexes to avoid data races. +// - If `BnBMode == branch_and_bound_mode_t::DETERMINISTIC`, then each thread has it own pseudocost +// snapshot, hence, we can disable all atomics and mutexes. +// `BnBMode` is automatically set depending if it is a `pseudo_costs_t` (REGULAR) +// or a `pseudo_costs_snapshot_t` (DETERMINISTIC). +template class pseudo_costs_t { public: + // Define the types used for storing the pseudocost of each variable. + // Disable or enable atomics depending on if we are in REGULAR or DETERMINISTIC modes + using float_type = + std::conditional_t, f_t>; + + using int_type = + std::conditional_t, i_t>; + + // Counting the number of LP iterations might require more than an int32 can hold. + using int64_type = + std::conditional_t, int64_t>; + + // Disable or enable mutexes depending on if we are in REGULAR or DETERMINISTIC modes + using mutex_type = + std::conditional_t; + explicit pseudo_costs_t(i_t num_variables) : pseudo_cost_sum_down(num_variables), pseudo_cost_sum_up(num_variables), @@ -441,21 +150,6 @@ class pseudo_costs_t { void update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective); - pseudo_cost_snapshot_t create_snapshot() const - { - const i_t n = (i_t)pseudo_cost_sum_down.size(); - std::vector sd(n), su(n); - std::vector nd(n), nu(n); - for (i_t j = 0; j < n; ++j) { - sd[j] = pseudo_cost_sum_down[j]; - su[j] = pseudo_cost_sum_up[j]; - nd[j] = pseudo_cost_num_down[j]; - nu[j] = pseudo_cost_num_up[j]; - } - return pseudo_cost_snapshot_t( - std::move(sd), std::move(su), std::move(nd), std::move(nu)); - } - void merge_updates(const std::vector>& updates) { for (const auto& upd : updates) { @@ -479,10 +173,7 @@ class pseudo_costs_t { pseudo_cost_mutex_down.resize(num_variables); } - void initialized(i_t& num_initialized_down, - i_t& num_initialized_up, - f_t& pseudo_cost_down_avg, - f_t& pseudo_cost_up_avg) const; + pseudo_cost_averages_t compute_averages() const; f_t obj_estimate(const std::vector& fractional, const std::vector& solution, @@ -506,6 +197,8 @@ class pseudo_costs_t { const lp_problem_t& original_lp); void update_pseudo_costs_from_strong_branching(const std::vector& fractional, + const std::vector& strong_branch_down, + const std::vector& strong_branch_up, const std::vector& root_soln); uint32_t compute_state_hash() const @@ -514,33 +207,96 @@ class pseudo_costs_t { detail::compute_hash(pseudo_cost_num_down) ^ detail::compute_hash(pseudo_cost_num_up); } - uint32_t compute_strong_branch_hash() const - { - return detail::compute_hash(strong_branch_down) ^ detail::compute_hash(strong_branch_up); - } - f_t calculate_pseudocost_score(i_t j, const std::vector& solution, - f_t pseudo_cost_up_avg, - f_t pseudo_cost_down_avg) const; + pseudo_cost_averages_t averages) const; reliability_branching_settings_t reliability_branching_settings; csc_matrix_t AT; // Transpose of the constraint matrix A - std::vector> pseudo_cost_sum_up; - std::vector> pseudo_cost_sum_down; - std::vector> pseudo_cost_num_up; - std::vector> pseudo_cost_num_down; - std::vector strong_branch_down; - std::vector strong_branch_up; - std::vector pseudo_cost_mutex_up; - std::vector pseudo_cost_mutex_down; - omp_atomic_t num_strong_branches_completed = 0; - omp_atomic_t strong_branching_lp_iter = 0; + std::vector pseudo_cost_sum_up; + std::vector pseudo_cost_sum_down; + std::vector pseudo_cost_num_up; + std::vector pseudo_cost_num_down; + std::vector pseudo_cost_mutex_up; + std::vector pseudo_cost_mutex_down; + int64_type strong_branching_lp_iter = 0; batch_pdlp_warm_cache_t pdlp_warm_cache; }; +template +class pseudo_cost_snapshot_t : public pseudo_costs_t { + public: + using Base = pseudo_costs_t; + + pseudo_cost_snapshot_t(i_t num_variables) : Base(num_variables) {}; + + pseudo_cost_snapshot_t(const pseudo_costs_t& other) + : Base(1) + { + *this = other; + } + + pseudo_cost_snapshot_t(const Base& other) : Base(1) { *this = other; } + pseudo_cost_snapshot_t& operator=( + const pseudo_costs_t& other) + { + i_t n = other.pseudo_cost_num_down.size(); + Base::pseudo_cost_num_down.resize(n); + Base::pseudo_cost_num_up.resize(n); + Base::pseudo_cost_sum_down.resize(n); + Base::pseudo_cost_sum_up.resize(n); + + for (i_t i = 0; i < n; ++i) { + Base::pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].get_no_atomic(); + Base::pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].get_no_atomic(); + Base::pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].get_no_atomic(); + Base::pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].get_no_atomic(); + } + + return *this; + } + + pseudo_cost_snapshot_t& operator=(const Base& other) + { + if (this != &other) { + Base::pseudo_cost_num_down = other.pseudo_cost_num_down; + Base::pseudo_cost_num_up = other.pseudo_cost_num_up; + Base::pseudo_cost_sum_down = other.pseudo_cost_sum_down; + Base::pseudo_cost_sum_up = other.pseudo_cost_sum_up; + } + return *this; + }; + + void queue_update( + i_t variable, rounding_direction_t direction, f_t delta, double clock, int worker_id) + { + updates_.push_back({variable, direction, delta, clock, worker_id}); + if (direction == rounding_direction_t::DOWN) { + Base::pseudo_cost_sum_down[variable] += delta; + ++Base::pseudo_cost_num_down[variable]; + } else { + Base::pseudo_cost_sum_up[variable] += delta; + ++Base::pseudo_cost_num_up[variable]; + } + } + + std::vector> take_updates() + { + std::vector> result; + result.swap(updates_); + return result; + } + + i_t n_vars() const { return Base::pseudo_cost_sum_down.size(); } + + private: + std::vector> updates_; +}; + template void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 8dcb7a71a5..6b51e45c23 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -7,36 +7,19 @@ #pragma once +#include #include #include #include -#include #include -#include #include -#include #include namespace cuopt::linear_programming::dual_simplex { -constexpr int num_search_strategies = 5; - -// Indicate the search and variable selection algorithms used by each thread -// in B&B (See [1]). -// -// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, -// Berlin, 2007. doi: 10.14279/depositonce-1634. -enum search_strategy_t : int { - BEST_FIRST = 0, // Best-First + Plunging. - PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) - LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) - GUIDED_DIVING = 3, // Guided diving (9.2.3). - COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) -}; - template struct branch_and_bound_stats_t { f_t start_time = 0.0; diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index f6e66472dd..8890a7487a 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -54,6 +54,15 @@ class omp_mutex_t { std::unique_ptr mutex; }; +// Empty class with the same methods as `omp_mutex_t`. This is mainly used for cleanly disabling +// the `omp_mutex_t` via type alias (`lock` and `unlock` are replaced by NOOPs). +class fake_omp_mutex_t { + public: + static void lock() {} + static void unlock() {} + static bool try_lock() { return true; } +}; + // Wrapper for omp atomic operations. See // https://www.openmp.org/spec-html/5.1/openmpsu105.html. template @@ -117,6 +126,11 @@ class omp_atomic_t { T fetch_sub(T inc) { return fetch_add(-inc); } + // Get the underlying value without atomics + T& get_no_atomic() { return val; } + + T get_no_atomic() const { return val; } + private: T val; From a517f13167b4d815eb6ddf6569d220259cc63f00 Mon Sep 17 00:00:00 2001 From: Nicolas Guidotti Date: Tue, 7 Apr 2026 15:07:50 +0200 Subject: [PATCH 04/53] fixed compilation Signed-off-by: Nicolas Guidotti --- cpp/src/branch_and_bound/CMakeLists.txt | 1 - cpp/src/branch_and_bound/constants.hpp | 2 +- cpp/src/branch_and_bound/mip_node.cpp | 18 --- cpp/src/branch_and_bound/pseudo_costs.cpp | 155 +++++++++++----------- cpp/src/branch_and_bound/pseudo_costs.hpp | 26 ++-- cpp/src/branch_and_bound/worker.hpp | 7 +- 6 files changed, 92 insertions(+), 117 deletions(-) delete mode 100644 cpp/src/branch_and_bound/mip_node.cpp diff --git a/cpp/src/branch_and_bound/CMakeLists.txt b/cpp/src/branch_and_bound/CMakeLists.txt index 5bb1017120..1e40c1bbf1 100644 --- a/cpp/src/branch_and_bound/CMakeLists.txt +++ b/cpp/src/branch_and_bound/CMakeLists.txt @@ -5,7 +5,6 @@ set(BRANCH_AND_BOUND_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/mip_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pseudo_costs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp ) diff --git a/cpp/src/branch_and_bound/constants.hpp b/cpp/src/branch_and_bound/constants.hpp index bad176b3d2..ab8677095c 100644 --- a/cpp/src/branch_and_bound/constants.hpp +++ b/cpp/src/branch_and_bound/constants.hpp @@ -26,6 +26,6 @@ enum search_strategy_t : int { enum class rounding_direction_t { NONE = -1, DOWN = 0, UP = 1 }; -enum class branch_and_bound_mode_t { REGULAR = 0, DETERMINISTIC = 1 }; +enum class branch_and_bound_mode_t { PARALLEL = 0, DETERMINISTIC = 1 }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/mip_node.cpp b/cpp/src/branch_and_bound/mip_node.cpp deleted file mode 100644 index 7b0f644f4e..0000000000 --- a/cpp/src/branch_and_bound/mip_node.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* clang-format off */ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ -/* clang-format on */ - -#include - -namespace cuopt::linear_programming::dual_simplex { - -bool inactive_status(node_status_t status) -{ - return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE || - status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL); -} - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index c92ab74fce..a7e38ed9cd 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -297,20 +297,17 @@ void strong_branch_helper(i_t start, const std::vector& root_soln, const std::vector& root_vstatus, const std::vector& edge_norms, - std::vector& strong_branch_down, - std::vector& strong_branch_up, - omp_atomic_t& num_strong_branches_completed) f_t root_obj, f_t upper_bound, i_t iter_limit, -std::vector& strong_branch_down, + std::vector& strong_branch_down, std::vector& strong_branch_up, - omp_atomic_t& num_strong_branches_completed, std::vector& dual_simplex_obj_down, std::vector& dual_simplex_obj_up, std::vector& dual_simplex_status_down, std::vector& dual_simplex_status_up, - shared_strong_branching_context_view_t& sb_view) + shared_strong_branching_context_view_t& sb_view, + omp_atomic_t& num_strong_branches_completed) { raft::common::nvtx::range scope("BB::strong_branch_helper"); lp_problem_t child_problem = original_lp; @@ -387,7 +384,7 @@ std::vector& strong_branch_down, } if (branch == 0) { - strong_branch_down[k] = std::max(obj - root_obj, 0.0); + strong_branch_down[k] = std::max(obj - root_obj, 0.0); dual_simplex_obj_down[k] = std::max(obj - root_obj, 0.0); dual_simplex_status_down[k] = status; if (verbose) { @@ -400,7 +397,7 @@ std::vector& strong_branch_down, toc(start_time)); } } else { - strong_branch_up[k] = std::max(obj - root_obj, 0.0); + strong_branch_up[k] = std::max(obj - root_obj, 0.0); dual_simplex_obj_up[k] = std::max(obj - root_obj, 0.0); dual_simplex_status_up[k] = status; if (verbose) { @@ -470,7 +467,7 @@ std::pair trial_branching(const lp_problem_t& ori f_t upper_bound, f_t start_time, i_t iter_limit, - i_t& iter) + i_t& iter) { lp_problem_t child_problem = original_lp; child_problem.lower[branch_var] = branch_var_lower; @@ -706,10 +703,6 @@ static void batch_pdlp_strong_branching_task( std::vector& pdlp_obj_up) { constexpr bool verbose = false; - pc.resize(original_lp.num_cols); - std::vector strong_branch_down(fractional.size(), 0); - std::vector strong_branch_up(fractional.size(), 0); - omp_atomic_t num_strong_branches_completed = 0; settings.log.printf(effective_batch_pdlp == 2 ? "Batch PDLP only for strong branching\n" @@ -743,9 +736,9 @@ static void batch_pdlp_strong_branching_task( std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); if (warm_start_remaining_time <= 0.0) { return; } - assert(!pc.pdlp_warm_cache.populated && "PDLP warm cache should not be populated at this point"); + assert(!pc.pdlp_warm_cache->populated && "PDLP warm cache should not be populated at this point"); - if (!pc.pdlp_warm_cache.populated) { + if (!pc.pdlp_warm_cache->populated) { pdlp_solver_settings_t ws_settings; ws_settings.method = method_t::PDLP; ws_settings.presolver = presolver_t::None; @@ -767,14 +760,14 @@ static void batch_pdlp_strong_branching_task( ws_settings.inside_mip = true; if (effective_batch_pdlp == 1) { ws_settings.concurrent_halt = &concurrent_halt; } - auto start_time = std::chrono::high_resolution_clock::now(); + auto lp_start_time = std::chrono::high_resolution_clock::now(); - auto ws_solution = solve_lp(&pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, ws_settings); + auto ws_solution = solve_lp(&pc.pdlp_warm_cache->batch_pdlp_handle, mps_model, ws_settings); if (verbose) { auto end_time = std::chrono::high_resolution_clock::now(); auto duration = - std::chrono::duration_cast(end_time - start_time).count(); + std::chrono::duration_cast(end_time - lp_start_time).count(); settings.log.printf( "Original problem solved in %d milliseconds" " and iterations: %d\n", @@ -788,21 +781,21 @@ static void batch_pdlp_strong_branching_task( const auto& ws_dual = ws_solution.get_dual_solution(); // Need to use the pc steam since the batch pdlp handle will get destroyed after the warm // start - cache.initial_primal = rmm::device_uvector(ws_primal, ws_primal.stream()); - cache.initial_dual = rmm::device_uvector(ws_dual, ws_dual.stream()); - cache.step_size = ws_solution.get_pdlp_warm_start_data().initial_step_size_; - cache.primal_weight = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_; - cache.pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_; - cache.populated = true; + cache->initial_primal = rmm::device_uvector(ws_primal, ws_primal.stream()); + cache->initial_dual = rmm::device_uvector(ws_dual, ws_dual.stream()); + cache->step_size = ws_solution.get_pdlp_warm_start_data().initial_step_size_; + cache->primal_weight = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_; + cache->pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_; + cache->populated = true; if (verbose) { settings.log.printf( "Cached PDLP warm start: primal=%zu dual=%zu step_size=%e primal_weight=%e iters=%d\n", - cache.initial_primal.size(), - cache.initial_dual.size(), - cache.step_size, - cache.primal_weight, - cache.pdlp_iteration); + cache->initial_primal.size(), + cache->initial_dual.size(), + cache->step_size, + cache->primal_weight, + cache->pdlp_iteration); } } else { if (verbose) { @@ -828,22 +821,23 @@ static void batch_pdlp_strong_branching_task( if (batch_remaining_time <= 0.0) { return; } pdlp_settings.time_limit = batch_remaining_time; - if (pc.pdlp_warm_cache.populated) { + if (pc.pdlp_warm_cache->populated) { auto& cache = pc.pdlp_warm_cache; - pdlp_settings.set_initial_primal_solution(cache.initial_primal.data(), - cache.initial_primal.size(), - cache.batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_dual_solution( - cache.initial_dual.data(), cache.initial_dual.size(), cache.batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_step_size(cache.step_size); - pdlp_settings.set_initial_primal_weight(cache.primal_weight); - pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + pdlp_settings.set_initial_primal_solution(cache->initial_primal.data(), + cache->initial_primal.size(), + cache->batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution(cache->initial_dual.data(), + cache->initial_dual.size(), + cache->batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(cache->step_size); + pdlp_settings.set_initial_primal_weight(cache->primal_weight); + pdlp_settings.set_initial_pdlp_iteration(cache->pdlp_iteration); } if (concurrent_halt.load() == 1) { return; } const auto solutions = batch_pdlp_solve( - &pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); + &pc.pdlp_warm_cache->batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); f_t batch_pdlp_strong_branching_time = toc(start_batch); // Fail safe in case the batch PDLP failed and produced no solutions @@ -899,7 +893,7 @@ static void batch_pdlp_reliability_branching_task( const std::vector& candidate_vars, const simplex_solver_settings_t& settings, shared_strong_branching_context_view_t& sb_view, - batch_pdlp_warm_cache_t& pdlp_warm_cache, + batch_pdlp_warm_cache_t* pdlp_warm_cache, std::vector& pdlp_obj_down, std::vector& pdlp_obj_up) { @@ -946,15 +940,16 @@ static void batch_pdlp_reliability_branching_task( } pdlp_settings.time_limit = batch_remaining_time; - if (pdlp_warm_cache.populated) { - auto& cache = pdlp_warm_cache; - pdlp_settings.set_initial_primal_solution( - cache.initial_primal.data(), cache.initial_primal.size(), batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_dual_solution( - cache.initial_dual.data(), cache.initial_dual.size(), batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_step_size(cache.step_size); - pdlp_settings.set_initial_primal_weight(cache.primal_weight); - pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + if (pdlp_warm_cache->populated) { + pdlp_settings.set_initial_primal_solution(pdlp_warm_cache->initial_primal.data(), + pdlp_warm_cache->initial_primal.size(), + batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution(pdlp_warm_cache->initial_dual.data(), + pdlp_warm_cache->initial_dual.size(), + batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(pdlp_warm_cache->step_size); + pdlp_settings.set_initial_primal_weight(pdlp_warm_cache->primal_weight); + pdlp_settings.set_initial_pdlp_iteration(pdlp_warm_cache->pdlp_iteration); } if (concurrent_halt.load() == 1) { return; } @@ -1013,9 +1008,9 @@ void strong_branching(const lp_problem_t& original_lp, constexpr bool verbose = false; pc.resize(original_lp.num_cols); - pc.strong_branch_down.assign(fractional.size(), 0); - pc.strong_branch_up.assign(fractional.size(), 0); - pc.num_strong_branches_completed = 0; + std::vector strong_branch_down(fractional.size(), std::numeric_limits::quiet_NaN()); + std::vector strong_branch_up(fractional.size(), std::numeric_limits::quiet_NaN()); + omp_atomic_t num_strong_branches_completed = 0; const f_t elapsed_time = toc(start_time); if (elapsed_time > settings.time_limit) { return; } @@ -1060,10 +1055,10 @@ void strong_branching(const lp_problem_t& original_lp, basic_list, nonbasic_list, fractional, - pc.AT, + pc.AT, basis_factors, - strong_branch_down, - strong_branch_up); + strong_branch_down, + strong_branch_up); } else { #pragma omp parallel num_threads(settings.num_threads) { @@ -1095,7 +1090,6 @@ void strong_branching(const lp_problem_t& original_lp, i_t start = std::floor(k * fractional.size() / n); i_t end = std::floor((k + 1) * fractional.size() / n); - constexpr bool verbose = false; if (verbose) { settings.log.printf("Thread id %d task id %d start %d end %d. size %d\n", omp_get_thread_num(), @@ -1119,13 +1113,13 @@ void strong_branching(const lp_problem_t& original_lp, upper_bound, simplex_iteration_limit, strong_branch_down, -strong_branch_up, -num_strong_branches_completed, + strong_branch_up, dual_simplex_obj_down, dual_simplex_obj_up, dual_simplex_status_down, dual_simplex_status_up, - sb_view); + sb_view, + num_strong_branches_completed); } // DS done: signal PDLP to stop (time-limit or all work done) and wait if (effective_batch_pdlp == 1) { concurrent_halt.store(1); } @@ -1193,7 +1187,7 @@ num_strong_branches_completed, for (i_t k = 0; k < fractional.size(); k++) { for (i_t branch = 0; branch < 2; branch++) { const bool is_down = (branch == 0); - f_t& sb_dest = is_down ? pc.strong_branch_down[k] : pc.strong_branch_up[k]; + f_t& sb_dest = is_down ? strong_branch_down[k] : strong_branch_up[k]; f_t ds_obj = is_down ? dual_simplex_obj_down[k] : dual_simplex_obj_up[k]; dual::status_t ds_status = is_down ? dual_simplex_status_down[k] : dual_simplex_status_up[k]; @@ -1226,12 +1220,12 @@ num_strong_branches_completed, } } - pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root = + pc.pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root = (f_t(merged_from_pdlp) / f_t(fractional.size() * 2)) * 100.0; if (verbose) { settings.log.printf( "Batch PDLP for strong branching. Percent solved by batch PDLP at root: %f\n", - pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root); + pc.pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root); settings.log.printf( "Merged results: %d from DS, %d from PDLP, %d unresolved (NaN), %d solved by both\n", merged_from_ds, @@ -1251,9 +1245,11 @@ f_t pseudo_costs_t::calculate_pseudocost_score( { constexpr f_t eps = 1e-6; i_t num_up = pseudo_cost_num_up[j]; + i_t sum_up = pseudo_cost_sum_up[j]; i_t num_down = pseudo_cost_num_down[j]; - f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : averages.up_avg; - f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : averages.down_avg; + i_t sum_down = pseudo_cost_sum_down[j]; + f_t pc_up = num_up > 0 ? sum_up / num_up : averages.up_avg; + f_t pc_down = num_down > 0 ? sum_down / num_down : averages.down_avg; f_t f_down = solution[j] - std::floor(solution[j]); f_t f_up = std::ceil(solution[j]) - solution[j]; return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); @@ -1437,12 +1433,12 @@ i_t pseudo_costs_t::reliable_variable_selection( constexpr f_t min_percent_solved_by_batch_pdlp_at_root_for_pdlp = 5.0; // Batch PDLP is either forced or we use the heuristic to decide if it should be used const bool use_pdlp = (rb_mode == 2) || (rb_mode != 0 && !settings.sub_mip && - !settings.deterministic && pdlp_warm_cache.populated && + !settings.deterministic && pdlp_warm_cache->populated && unreliable_list.size() > min_num_candidates_for_pdlp && - pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root > + pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root > min_percent_solved_by_batch_pdlp_at_root_for_pdlp); - if (rb_mode != 0 && !pdlp_warm_cache.populated) { + if (rb_mode != 0 && !pdlp_warm_cache->populated) { log.printf("PDLP warm start data not populated, using DS only\n"); } else if (rb_mode != 0 && settings.sub_mip) { log.printf("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n"); @@ -1451,7 +1447,7 @@ i_t pseudo_costs_t::reliable_variable_selection( "Batch PDLP reliability branching is disabled because deterministic mode is enabled\n"); } else if (rb_mode != 0 && unreliable_list.size() < min_num_candidates_for_pdlp) { log.printf("Not enough candidates to use batch PDLP, using DS only\n"); - } else if (rb_mode != 0 && pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root < 5.0) { + } else if (rb_mode != 0 && pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root < 5.0) { log.printf("Percent solved by batch PDLP at root is too low, using DS only\n"); } else if (use_pdlp) { log.printf( @@ -1459,7 +1455,7 @@ i_t pseudo_costs_t::reliable_variable_selection( "by batch PDLP at root is %f%% (> %f%%)\n", static_cast(unreliable_list.size()), min_num_candidates_for_pdlp, - pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root, + pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root, min_percent_solved_by_batch_pdlp_at_root_for_pdlp); } @@ -1574,7 +1570,7 @@ i_t pseudo_costs_t::reliable_variable_selection( candidate_vars, settings, sb_view, - pdlp_warm_cache, + pdlp_warm_cache.get(), pdlp_obj_down, pdlp_obj_up); } @@ -1616,7 +1612,7 @@ i_t pseudo_costs_t::reliable_variable_selection( pseudo_cost_mutex_down[j].lock(); if (pseudo_cost_num_down[j] < reliable_threshold) { // Do trial branching on the down branch - i_t iter = 0; + i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, @@ -1631,8 +1627,8 @@ i_t pseudo_costs_t::reliable_variable_selection( upper_bound, start_time, iter_limit_per_trial, - iter); - strong_branching_lp_iter += iter; + iter); + strong_branching_lp_iter += iter; dual_simplex_obj_down[i] = obj; dual_simplex_status_down[i] = status; @@ -1661,7 +1657,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } else { pseudo_cost_mutex_up[j].lock(); if (pseudo_cost_num_up[j] < reliable_threshold) { - i_t iter = 0; + i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, @@ -1676,8 +1672,8 @@ i_t pseudo_costs_t::reliable_variable_selection( upper_bound, start_time, iter_limit_per_trial, - iter); - strong_branching_lp_iter += iter; + iter); + strong_branching_lp_iter += iter; dual_simplex_obj_up[i] = obj; dual_simplex_status_up[i] = status; @@ -1698,7 +1694,7 @@ i_t pseudo_costs_t::reliable_variable_selection( if (toc(start_time) > settings.time_limit) { continue; } - score = calculate_pseudocost_score(j, leaf_solution.x, averages); + score = calculate_pseudocost_score(j, leaf_solution.x, averages); score_mutex.lock(); if (score > max_score) { max_score = score; @@ -1778,8 +1774,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } } - f_t score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); if (score > max_score) { max_score = score; branch_var = j; @@ -1854,7 +1849,7 @@ void pseudo_costs_t::update_pseudo_costs_from_strong_branchin #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template class pseudo_costs_t; +template class pseudo_costs_t; template class pseudo_costs_t; template class pseudo_cost_snapshot_t; diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index d980cd2767..ccba51ce97 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -71,7 +71,6 @@ struct branch_variable_t { rounding_direction_t direction; }; - template struct batch_pdlp_warm_cache_t { const raft::handle_t batch_pdlp_handle{}; @@ -110,32 +109,32 @@ struct pseudo_cost_update_t { }; // `BnBMode` specify how we control the memory accesses: -// - If `BnBMode == branch_and_bound_mode_t::REGULAR`, then we assume that this object is shared +// - If `BnBMode == branch_and_bound_mode_t::PARALLEL`, then we assume that this object is shared // among the B&B threads, and thus, require atomics and mutexes to avoid data races. // - If `BnBMode == branch_and_bound_mode_t::DETERMINISTIC`, then each thread has it own pseudocost // snapshot, hence, we can disable all atomics and mutexes. -// `BnBMode` is automatically set depending if it is a `pseudo_costs_t` (REGULAR) +// `BnBMode` is automatically set depending if it is a `pseudo_costs_t` (PARALLEL) // or a `pseudo_costs_snapshot_t` (DETERMINISTIC). template + branch_and_bound_mode_t BnBMode = branch_and_bound_mode_t::PARALLEL> class pseudo_costs_t { public: // Define the types used for storing the pseudocost of each variable. // Disable or enable atomics depending on if we are in REGULAR or DETERMINISTIC modes using float_type = - std::conditional_t, f_t>; + std::conditional_t, f_t>; using int_type = - std::conditional_t, i_t>; + std::conditional_t, i_t>; // Counting the number of LP iterations might require more than an int32 can hold. - using int64_type = - std::conditional_t, int64_t>; + using int64_type = std:: + conditional_t, int64_t>; // Disable or enable mutexes depending on if we are in REGULAR or DETERMINISTIC modes using mutex_type = - std::conditional_t; + std::conditional_t; explicit pseudo_costs_t(i_t num_variables) : pseudo_cost_sum_down(num_variables), @@ -144,7 +143,8 @@ class pseudo_costs_t { pseudo_cost_num_up(num_variables), pseudo_cost_mutex_up(num_variables), pseudo_cost_mutex_down(num_variables), - AT(1, 1, 1) + AT(1, 1, 1), + pdlp_warm_cache(std::make_shared>()) { } @@ -222,7 +222,7 @@ class pseudo_costs_t { std::vector pseudo_cost_mutex_down; int64_type strong_branching_lp_iter = 0; - batch_pdlp_warm_cache_t pdlp_warm_cache; + std::shared_ptr> pdlp_warm_cache; }; template { pseudo_cost_snapshot_t(i_t num_variables) : Base(num_variables) {}; - pseudo_cost_snapshot_t(const pseudo_costs_t& other) + pseudo_cost_snapshot_t(const pseudo_costs_t& other) : Base(1) { *this = other; @@ -242,7 +242,7 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { pseudo_cost_snapshot_t(const Base& other) : Base(1) { *this = other; } pseudo_cost_snapshot_t& operator=( - const pseudo_costs_t& other) + const pseudo_costs_t& other) { i_t n = other.pseudo_cost_num_down.size(); Base::pseudo_cost_num_down.resize(n); diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 6b51e45c23..87689e57bb 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -56,8 +56,8 @@ class branch_and_bound_worker_t { pcgenerator_t rng; - bool recompute_basis = true; - bool recompute_bounds = true; + bool recompute_basis = true; + bool recompute_bounds = true; branch_and_bound_worker_t(i_t worker_id, const lp_problem_t& original_lp, @@ -99,9 +99,8 @@ class branch_and_bound_worker_t { const lp_problem_t& original_lp, const simplex_solver_settings_t& settings) { - internal_node = node->detach_copy(); + internal_node = node->detach_copy(); start_node = &internal_node; - start_lower = original_lp.lower; start_upper = original_lp.upper; search_strategy = type; From f31599c7d071bd0aa48932131e8753602ae8139c Mon Sep 17 00:00:00 2001 From: Nicolas Guidotti Date: Tue, 7 Apr 2026 15:08:55 +0200 Subject: [PATCH 05/53] added missing header Signed-off-by: Nicolas Guidotti --- cpp/src/branch_and_bound/pseudo_costs.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index ccba51ce97..61ab60c953 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include From 202738fc8ab4d0bc7ea923edb70bb608cf2e43ba Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 7 Apr 2026 15:19:23 +0200 Subject: [PATCH 06/53] fixed guard against no incumbent when calling guided diving Signed-off-by: Nicolas Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index b60548a525..bea394e717 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1122,8 +1122,13 @@ struct deterministic_diving_policy_t return line_search_diving(fractional, x, *this->worker.root_solution, log); case search_strategy_t::GUIDED_DIVING: - return guided_diving( - this->worker.pc_snapshot, fractional, x, this->worker.incumbent_snapshot, log); + if (this->worker.incumbent_snapshot.empty()) { + return pseudocost_diving( + this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); + } else { + return guided_diving( + this->worker.pc_snapshot, fractional, x, this->worker.incumbent_snapshot, log); + } case search_strategy_t::COEFFICIENT_DIVING: { return coefficient_diving(this->worker.leaf_problem, From 4aed76c85fd86aeef59cdaaf3532db535f20196c Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 7 Apr 2026 16:28:03 +0200 Subject: [PATCH 07/53] addressing code rabbit comments. replaced AT in pseudo_costs_t with a shared_ptr to avoid unnecessary copy. Signed-off-by: Nicolas L. Guidotti --- .../deterministic_workers.hpp | 2 +- cpp/src/branch_and_bound/pseudo_costs.cpp | 21 ++++++++----------- cpp/src/branch_and_bound/pseudo_costs.hpp | 9 ++++++-- cpp/src/branch_and_bound/worker_pool.hpp | 13 +----------- 4 files changed, 18 insertions(+), 27 deletions(-) diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp index acafe329f9..a5c3769126 100644 --- a/cpp/src/branch_and_bound/deterministic_workers.hpp +++ b/cpp/src/branch_and_bound/deterministic_workers.hpp @@ -74,7 +74,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { // Diving-specific snapshots (ignored by BFS workers) std::vector incumbent_snapshot; - i_t total_lp_iters_snapshot{0}; + int64_t total_lp_iters_snapshot{0}; std::vector> integer_solutions; int next_solution_seq{0}; diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index a7e38ed9cd..cf67a69046 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -1055,7 +1055,7 @@ void strong_branching(const lp_problem_t& original_lp, basic_list, nonbasic_list, fractional, - pc.AT, + *pc.AT, basis_factors, strong_branch_down, strong_branch_up); @@ -1245,9 +1245,9 @@ f_t pseudo_costs_t::calculate_pseudocost_score( { constexpr f_t eps = 1e-6; i_t num_up = pseudo_cost_num_up[j]; - i_t sum_up = pseudo_cost_sum_up[j]; + f_t sum_up = pseudo_cost_sum_up[j]; i_t num_down = pseudo_cost_num_down[j]; - i_t sum_down = pseudo_cost_sum_down[j]; + f_t sum_down = pseudo_cost_sum_down[j]; f_t pc_up = num_up > 0 ? sum_up / num_up : averages.up_avg; f_t pc_down = num_down > 0 ? sum_down / num_down : averages.down_avg; f_t f_down = solution[j] - std::floor(solution[j]); @@ -1282,17 +1282,14 @@ pseudo_cost_averages_t pseudo_costs_t::compute_aver f_t pseudo_cost_up_avg = 0.0; for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { - if (pseudo_cost_num_down[j] > 0) { + if (pseudo_cost_num_down[j] > 0 && std::isfinite(pseudo_cost_sum_down[j])) { ++num_initialized_down; - if (std::isfinite(pseudo_cost_sum_down[j])) { - pseudo_cost_down_avg += pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; - } + pseudo_cost_down_avg += pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; } - if (pseudo_cost_num_up[j] > 0) { + + if (pseudo_cost_num_up[j] > 0 && std::isfinite(pseudo_cost_sum_up[j])) { ++num_initialized_up; - if (std::isfinite(pseudo_cost_sum_up[j])) { - pseudo_cost_up_avg += pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; - } + pseudo_cost_up_avg += pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; } } @@ -1504,7 +1501,7 @@ i_t pseudo_costs_t::reliable_variable_selection( objective_change_estimate_t estimate = single_pivot_objective_change_estimate(worker->leaf_problem, settings, - AT, + *AT, node_ptr->vstatus, j, basic_map[j], diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 61ab60c953..5db0d573a2 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -144,7 +144,7 @@ class pseudo_costs_t { pseudo_cost_num_up(num_variables), pseudo_cost_mutex_up(num_variables), pseudo_cost_mutex_down(num_variables), - AT(1, 1, 1), + AT(std::make_shared>(1, 1, 1)), pdlp_warm_cache(std::make_shared>()) { } @@ -214,7 +214,7 @@ class pseudo_costs_t { reliability_branching_settings_t reliability_branching_settings; - csc_matrix_t AT; // Transpose of the constraint matrix A + std::shared_ptr> AT; // Transpose of the constraint matrix A std::vector pseudo_cost_sum_up; std::vector pseudo_cost_sum_down; std::vector pseudo_cost_num_up; @@ -245,6 +245,9 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { pseudo_cost_snapshot_t& operator=( const pseudo_costs_t& other) { + Base::AT = other.AT; + Base::pdlp_warm_cache = other.pdlp_warm_cache; + i_t n = other.pseudo_cost_num_down.size(); Base::pseudo_cost_num_down.resize(n); Base::pseudo_cost_num_up.resize(n); @@ -264,6 +267,8 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { pseudo_cost_snapshot_t& operator=(const Base& other) { if (this != &other) { + Base::AT = other.AT; + Base::pdlp_warm_cache = other.pdlp_warm_cache; Base::pseudo_cost_num_down = other.pseudo_cost_num_down; Base::pseudo_cost_num_up = other.pseudo_cost_num_up; Base::pseudo_cost_sum_down = other.pseudo_cost_sum_down; diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index 2396b88914..2b52b6e7bf 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -80,17 +80,6 @@ class branch_and_bound_worker_pool_t { i_t num_idle_workers() { return num_idle_workers_; } - void broadcast_root_bounds_change() - { - if (is_initialized) { - for (i_t i = 0; i < workers_.size(); ++i) { - if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { - workers_[i]->start_bounds_updated = true; - } - } - } - } - private: // Worker pool std::vector>> workers_; @@ -138,4 +127,4 @@ std::array get_max_workers( return max_num_workers; } -} // namespace cuopt::linear_programming::dual_simplex \ No newline at end of file +} // namespace cuopt::linear_programming::dual_simplex From a5c111d53828b258c8957008e7a4e1a3ba5d32c8 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 7 Apr 2026 16:34:50 +0200 Subject: [PATCH 08/53] missing dereference Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index bea394e717..56203cec72 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -2510,7 +2510,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); pc_.resize(original_lp_.num_cols); - original_lp_.A.transpose(pc_.AT); + original_lp_.A.transpose(*pc_.AT); { raft::common::nvtx::range scope_sb("BB::strong_branching"); strong_branching(original_lp_, From 76ce1bb215e8af4208427dc6c7f45126e73033ad Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 8 Apr 2026 10:47:33 +0200 Subject: [PATCH 09/53] split best-first and diving worker into separated objects Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 70 ++++++++------ cpp/src/branch_and_bound/branch_and_bound.hpp | 11 ++- cpp/src/branch_and_bound/worker.hpp | 92 +++++++++++-------- cpp/src/branch_and_bound/worker_pool.hpp | 31 +++++-- 4 files changed, 124 insertions(+), 80 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 56203cec72..79bb57aa75 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -300,7 +300,7 @@ f_t branch_and_bound_t::get_lower_bound() f_t lower_bound = lower_bound_ceiling_.load(); f_t heap_lower_bound = node_queue_.get_lower_bound(); lower_bound = std::min(heap_lower_bound, lower_bound); - lower_bound = std::min(worker_pool_.get_lower_bound(), lower_bound); + lower_bound = std::min(bfs_worker_pool_.get_lower_bound(), lower_bound); if (std::isfinite(lower_bound)) { return lower_bound; @@ -841,7 +841,7 @@ branch_variable_t branch_and_bound_t::variable_selection( exploration_stats_, settings_, upper_bound_, - worker_pool_.num_idle_workers(), + bfs_worker_pool_.num_idle_workers(), log, new_slacks_, original_lp_); @@ -1435,7 +1435,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( return lp_status; } template -void branch_and_bound_t::plunge_with(branch_and_bound_worker_t* worker) +void branch_and_bound_t::plunge_with(bfs_worker_t* worker) { std::deque*> stack; stack.push_front(worker->start_node); @@ -1557,13 +1557,13 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t 1) { - worker_pool_.return_worker_to_pool(worker); + bfs_worker_pool_.return_worker_to_pool(worker); active_workers_per_strategy_[BEST_FIRST]--; } } template -void branch_and_bound_t::dive_with(branch_and_bound_worker_t* worker) +void branch_and_bound_t::dive_with(diving_worker_t* worker) { raft::common::nvtx::range scope("BB::diving_thread"); logger_t log; @@ -1647,7 +1647,7 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); } - worker_pool_.return_worker_to_pool(worker); + diving_worker_pool_.return_worker_to_pool(worker); active_workers_per_strategy_[search_strategy]--; } @@ -1662,7 +1662,11 @@ void branch_and_bound_t::run_scheduler() std::array max_num_workers_per_type = get_max_workers(num_workers, strategies); - worker_pool_.init(num_workers, original_lp_, Arow_, var_types_, settings_); + const i_t num_bfs_workers = max_num_workers_per_type[BEST_FIRST]; + const i_t num_diving_workers = num_workers - num_bfs_workers; + bfs_worker_pool_.init(num_bfs_workers, original_lp_, Arow_, var_types_, settings_, 0); + diving_worker_pool_.init( + num_diving_workers, original_lp_, Arow_, var_types_, settings_, num_bfs_workers); active_workers_per_strategy_.fill(0); #ifdef CUOPT_LOG_DEBUG @@ -1732,55 +1736,61 @@ void branch_and_bound_t::run_scheduler() continue; } - // Get an idle worker. - branch_and_bound_worker_t* worker = worker_pool_.get_idle_worker(); - if (worker == nullptr) { break; } - if (strategy == BEST_FIRST) { + // Get an idle worker. + bfs_worker_t* bfs_worker = bfs_worker_pool_.get_idle_worker(); + if (bfs_worker == nullptr) { continue; } + // If there any node left in the heap, we pop the top node and explore it. std::optional*> start_node = node_queue_.pop_best_first(); if (!start_node.has_value()) { continue; } - if (get_cutoff() < start_node.value()->lower_bound) { + mip_node_t* node_ptr = start_node.value(); + + if (get_cutoff() < node_ptr->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound - search_tree_.graphviz_node( - settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - search_tree_.update(start_node.value(), node_status_t::FATHOMED); + search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); + search_tree_.update(node_ptr, node_status_t::FATHOMED); continue; } // Remove the worker from the idle list. - worker_pool_.pop_idle_worker(); - worker->init_best_first(start_node.value(), original_lp_); - last_node_depth = start_node.value()->depth; - last_int_infeas = start_node.value()->integer_infeasible; + bfs_worker_pool_.pop_idle_worker(); + bfs_worker->init(node_ptr, original_lp_); + last_node_depth = node_ptr->depth; + last_int_infeas = node_ptr->integer_infeasible; active_workers_per_strategy_[strategy]++; launched_any_task = true; -#pragma omp task affinity(worker) - plunge_with(worker); +#pragma omp task affinity(bfs_worker) + plunge_with(bfs_worker); } else { + // Get an idle worker. + diving_worker_t* diving_worker = diving_worker_pool_.get_idle_worker(); + if (diving_worker == nullptr) { continue; } + std::optional*> start_node = node_queue_.pop_diving(); if (!start_node.has_value()) { continue; } - if (get_cutoff() < start_node.value()->lower_bound || - start_node.value()->depth < diving_settings.min_node_depth) { + mip_node_t* node_ptr = start_node.value(); + + if (get_cutoff() < node_ptr->lower_bound || + node_ptr->depth < diving_settings.min_node_depth) { continue; } - bool is_feasible = - worker->init_diving(start_node.value(), strategy, original_lp_, settings_); + bool is_feasible = diving_worker->init(node_ptr, strategy, original_lp_, settings_); if (!is_feasible) { continue; } // Remove the worker from the idle list. - worker_pool_.pop_idle_worker(); + diving_worker_pool_.pop_idle_worker(); active_workers_per_strategy_[strategy]++; launched_any_task = true; -#pragma omp task affinity(worker) - dive_with(worker); +#pragma omp task affinity(diving_worker) + dive_with(diving_worker); } } @@ -1804,7 +1814,7 @@ void branch_and_bound_t::run_scheduler() template void branch_and_bound_t::single_threaded_solve() { - branch_and_bound_worker_t worker(0, original_lp_, Arow_, var_types_, settings_); + bfs_worker_t worker(0, original_lp_, Arow_, var_types_, settings_, 0); f_t lower_bound = get_lower_bound(); f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); @@ -1848,7 +1858,7 @@ void branch_and_bound_t::single_threaded_solve() continue; } - worker.init_best_first(start_node.value(), original_lp_); + worker.init(start_node.value(), original_lp_); plunge_with(&worker); lower_bound = get_lower_bound(); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 60904406f7..25bad6bebe 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -245,8 +245,11 @@ class branch_and_bound_t { // are waiting to be executed. std::array, num_search_strategies> active_workers_per_strategy_; - // Worker pool - branch_and_bound_worker_pool_t worker_pool_; + // Worker pool dedicated to the best-first search + bfs_worker_pool_t bfs_worker_pool_; + + // Worker pool dedicated to diving + diving_worker_pool_t diving_worker_pool_; // Global status of the solver. omp_atomic_t solver_status_; @@ -291,11 +294,11 @@ class branch_and_bound_t { // We use best-first to pick the `start_node` and then perform a depth-first search // from this node (i.e., a plunge). It can only backtrack to a sibling node. // Unexplored nodes in the subtree are inserted back into the global heap. - void plunge_with(branch_and_bound_worker_t* worker); + void plunge_with(bfs_worker_t* worker); // Perform a deep dive in the subtree determined by the `start_node` in order // to find integer feasible solutions. - void dive_with(branch_and_bound_worker_t* worker); + void dive_with(diving_worker_t* worker); // Run the scheduler whose will schedule and manage // all the other workers. diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 87689e57bb..d4f5d2fbd8 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -34,7 +34,10 @@ struct branch_and_bound_stats_t { template class branch_and_bound_worker_t { public: - const i_t worker_id; + using float_type = f_t; + using int_type = i_t; + + i_t worker_id; omp_atomic_t search_strategy; omp_atomic_t is_active; omp_atomic_t lower_bound; @@ -63,7 +66,8 @@ class branch_and_bound_worker_t { const lp_problem_t& original_lp, const csr_matrix_t& Arow, const std::vector& var_type, - const simplex_solver_settings_t& settings) + const simplex_solver_settings_t& settings, + const i_t rng_offset) : worker_id(worker_id), search_strategy(BEST_FIRST), is_active(false), @@ -75,41 +79,10 @@ class branch_and_bound_worker_t { nonbasic_list(), node_presolver(leaf_problem, Arow, {}, var_type), bounds_changed(original_lp.num_cols, false), + start_node(nullptr), rng(settings.random_seed + pcgenerator_t::default_seed + worker_id, - pcgenerator_t::default_stream ^ worker_id) - { - } - - // Set the `start_node` for best-first search. - void init_best_first(mip_node_t* node, const lp_problem_t& original_lp) - { - start_node = node; - start_lower = original_lp.lower; - start_upper = original_lp.upper; - search_strategy = BEST_FIRST; - lower_bound = node->lower_bound; - is_active = true; - } - - // Initialize the worker for diving, setting the `start_node`, `start_lower` and - // `start_upper`. Returns `true` if the starting node is feasible via - // bounds propagation. - bool init_diving(mip_node_t* node, - search_strategy_t type, - const lp_problem_t& original_lp, - const simplex_solver_settings_t& settings) + rng_offset + pcgenerator_t::default_stream ^ worker_id) { - internal_node = node->detach_copy(); - start_node = &internal_node; - start_lower = original_lp.lower; - start_upper = original_lp.upper; - search_strategy = type; - lower_bound = node->lower_bound; - is_active = true; - - std::fill(bounds_changed.begin(), bounds_changed.end(), false); - node->get_variable_bounds(start_lower, start_upper, bounds_changed); - return node_presolver.bounds_strengthening(settings, bounds_changed, start_lower, start_upper); } // Set the variables bounds for the LP relaxation in the current node. @@ -133,8 +106,55 @@ class branch_and_bound_worker_t { return node_presolver.bounds_strengthening( settings, bounds_changed, leaf_problem.lower, leaf_problem.upper); } +}; + +template +class bfs_worker_t : public branch_and_bound_worker_t { + public: + using Base = branch_and_bound_worker_t; + using Base::Base; + + // Set the `start_node` for best-first search. + void init(mip_node_t* node, const lp_problem_t& original_lp) + { + Base::start_node = node; + Base::start_lower = original_lp.lower; + Base::start_upper = original_lp.upper; + Base::search_strategy = BEST_FIRST; + Base::lower_bound = node->lower_bound; + Base::is_active = true; + } +}; + +template +class diving_worker_t : public branch_and_bound_worker_t { + public: + using Base = branch_and_bound_worker_t; + using Base::Base; + + // Initialize the worker for diving, setting the `start_node`, `start_lower` and + // `start_upper`. Returns `true` if the starting node is feasible via + // bounds propagation. + bool init(mip_node_t* node, + search_strategy_t type, + const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings) + { + internal_node = node->detach_copy(); + Base::start_node = &internal_node; + Base::start_lower = original_lp.lower; + Base::start_upper = original_lp.upper; + Base::search_strategy = type; + Base::lower_bound = node->lower_bound; + Base::is_active = true; + + std::fill(Base::bounds_changed.begin(), Base::bounds_changed.end(), false); + node->get_variable_bounds(Base::start_lower, Base::start_upper, Base::bounds_changed); + return Base::node_presolver.bounds_strengthening( + settings, Base::bounds_changed, Base::start_lower, Base::start_upper); + } - private: + protected: // For diving, we need to store the full node instead of // of just a pointer, since it is not stored in the tree anymore. // To keep the same interface across all worker types, diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index 2b52b6e7bf..745c61d8ae 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -11,20 +11,24 @@ namespace cuopt::linear_programming::dual_simplex { -template -class branch_and_bound_worker_pool_t { +template +class worker_pool_t { public: + using i_t = WorkerType::int_type; + using f_t = WorkerType::float_type; + void init(i_t num_workers, const lp_problem_t& original_lp, const csr_matrix_t& Arow, const std::vector& var_type, - const simplex_solver_settings_t& settings) + const simplex_solver_settings_t& settings, + const i_t rng_offset) { workers_.resize(num_workers); num_idle_workers_ = num_workers; for (i_t i = 0; i < num_workers; ++i) { - workers_[i] = std::make_unique>( - i, original_lp, Arow, var_type, settings); + workers_[i] = + std::make_unique(i, original_lp, Arow, var_type, settings, rng_offset); idle_workers_.push_front(i); } @@ -33,7 +37,7 @@ class branch_and_bound_worker_pool_t { // Here, we are assuming that the scheduler is the only // thread that can retrieve/pop an idle worker. - branch_and_bound_worker_t* get_idle_worker() + WorkerType* get_idle_worker() { std::lock_guard lock(mutex_); if (idle_workers_.empty()) { @@ -55,7 +59,7 @@ class branch_and_bound_worker_pool_t { } } - void return_worker_to_pool(branch_and_bound_worker_t* worker) + void return_worker_to_pool(WorkerType* worker) { worker->is_active = false; std::lock_guard lock(mutex_); @@ -69,7 +73,7 @@ class branch_and_bound_worker_pool_t { if (is_initialized) { for (i_t i = 0; i < workers_.size(); ++i) { - if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { + if (workers_[i]->is_active) { lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); } } @@ -78,11 +82,12 @@ class branch_and_bound_worker_pool_t { return lower_bound; } - i_t num_idle_workers() { return num_idle_workers_; } + i_t num_idle_workers() const { return num_idle_workers_; } + i_t num_workers() const { return workers_.size(); } private: // Worker pool - std::vector>> workers_; + std::vector> workers_; bool is_initialized = false; omp_mutex_t mutex_; @@ -127,4 +132,10 @@ std::array get_max_workers( return max_num_workers; } +template +using bfs_worker_pool_t = worker_pool_t>; + +template +using diving_worker_pool_t = worker_pool_t>; + } // namespace cuopt::linear_programming::dual_simplex From c433e41e034e794c1defd8c1be45e00d59dc88c3 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 8 Apr 2026 10:54:27 +0200 Subject: [PATCH 10/53] increase the wheel size limit Signed-off-by: Nicolas L. Guidotti --- ci/validate_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 79188cacc3..3a1dd08ad4 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -26,7 +26,7 @@ if [[ "${package_dir}" == "python/libcuopt" ]]; then ) else PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '495Mi' + --max-allowed-size-compressed '520Mi' ) fi elif [[ "${package_dir}" != "python/cuopt" ]] && \ From 52db538fd30dbafb66ebb43523b8039db12e7b2a Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 8 Apr 2026 11:36:34 +0200 Subject: [PATCH 11/53] fixed rng offset Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 4 ++-- cpp/src/branch_and_bound/worker.hpp | 6 +++--- cpp/src/branch_and_bound/worker_pool.hpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 79bb57aa75..5c5e788174 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1664,7 +1664,7 @@ void branch_and_bound_t::run_scheduler() const i_t num_bfs_workers = max_num_workers_per_type[BEST_FIRST]; const i_t num_diving_workers = num_workers - num_bfs_workers; - bfs_worker_pool_.init(num_bfs_workers, original_lp_, Arow_, var_types_, settings_, 0); + bfs_worker_pool_.init(num_bfs_workers, original_lp_, Arow_, var_types_, settings_); diving_worker_pool_.init( num_diving_workers, original_lp_, Arow_, var_types_, settings_, num_bfs_workers); active_workers_per_strategy_.fill(0); @@ -1814,7 +1814,7 @@ void branch_and_bound_t::run_scheduler() template void branch_and_bound_t::single_threaded_solve() { - bfs_worker_t worker(0, original_lp_, Arow_, var_types_, settings_, 0); + bfs_worker_t worker(0, original_lp_, Arow_, var_types_, settings_); f_t lower_bound = get_lower_bound(); f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index d4f5d2fbd8..65fe5c32dd 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -67,7 +67,7 @@ class branch_and_bound_worker_t { const csr_matrix_t& Arow, const std::vector& var_type, const simplex_solver_settings_t& settings, - const i_t rng_offset) + const uint64_t rng_offset = 0) : worker_id(worker_id), search_strategy(BEST_FIRST), is_active(false), @@ -80,8 +80,8 @@ class branch_and_bound_worker_t { node_presolver(leaf_problem, Arow, {}, var_type), bounds_changed(original_lp.num_cols, false), start_node(nullptr), - rng(settings.random_seed + pcgenerator_t::default_seed + worker_id, - rng_offset + pcgenerator_t::default_stream ^ worker_id) + rng(settings.random_seed + pcgenerator_t::default_seed + rng_offset + worker_id, + pcgenerator_t::default_stream ^ (worker_id + rng_offset)) { } diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index 745c61d8ae..8397cd6174 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -22,7 +22,7 @@ class worker_pool_t { const csr_matrix_t& Arow, const std::vector& var_type, const simplex_solver_settings_t& settings, - const i_t rng_offset) + const uint64_t rng_offset = 0) { workers_.resize(num_workers); num_idle_workers_ = num_workers; From 3676432d07d22a170e6d862510631f769e51d090 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 8 Apr 2026 11:37:31 +0200 Subject: [PATCH 12/53] increasing wheel size limit for CUDA 12 Signed-off-by: Nicolas L. Guidotti --- ci/validate_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 3a1dd08ad4..2dc95a23a1 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -22,7 +22,7 @@ PYDISTCHECK_ARGS=( if [[ "${package_dir}" == "python/libcuopt" ]]; then if [[ "${RAPIDS_CUDA_MAJOR}" == "12" ]]; then PYDISTCHECK_ARGS+=( - --max-allowed-size-compressed '650Mi' + --max-allowed-size-compressed '660Mi' ) else PYDISTCHECK_ARGS+=( From d2f6eb74fb9bc66bd8834eba29d262cbd1e6e1fe Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 8 Apr 2026 18:36:28 +0200 Subject: [PATCH 13/53] first version of the B&B workers with local heaps Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 335 ++++++++++-------- cpp/src/branch_and_bound/branch_and_bound.hpp | 11 +- .../deterministic_workers.hpp | 4 +- cpp/src/branch_and_bound/node_queue.hpp | 45 ++- cpp/src/branch_and_bound/worker.hpp | 59 +-- cpp/src/branch_and_bound/worker_pool.hpp | 36 +- 6 files changed, 277 insertions(+), 213 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 5c5e788174..45cf491215 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -257,7 +257,9 @@ branch_and_bound_t::branch_and_bound_t( root_relax_soln_(1, 1), root_crossover_soln_(1, 1), pc_(1), - solver_status_(mip_status_t::UNSET) + solver_status_(mip_status_t::UNSET), + rng_(settings_.random_seed ^ pcgenerator_t::default_seed, + settings_.random_seed ^ pcgenerator_t::default_stream ^ settings_.random_seed) { exploration_stats_.start_time = start_time; #ifdef PRINT_CONSTRAINT_MATRIX @@ -297,10 +299,8 @@ branch_and_bound_t::branch_and_bound_t( template f_t branch_and_bound_t::get_lower_bound() { - f_t lower_bound = lower_bound_ceiling_.load(); - f_t heap_lower_bound = node_queue_.get_lower_bound(); - lower_bound = std::min(heap_lower_bound, lower_bound); - lower_bound = std::min(bfs_worker_pool_.get_lower_bound(), lower_bound); + f_t lower_bound = lower_bound_ceiling_.load(); + lower_bound = std::min(bfs_worker_pool_.get_lower_bound(), lower_bound); if (std::isfinite(lower_bound)) { return lower_bound; @@ -1434,13 +1434,13 @@ dual::status_t branch_and_bound_t::solve_node_lp( return lp_status; } + template -void branch_and_bound_t::plunge_with(bfs_worker_t* worker) +void branch_and_bound_t::plunge_with(bfs_worker_t* worker, + mip_node_t* start_node) { std::deque*> stack; - stack.push_front(worker->start_node); - worker->recompute_basis = true; - worker->recompute_bounds = true; + stack.push_front(start_node); f_t lower_bound = get_lower_bound(); f_t upper_bound = upper_bound_; @@ -1458,7 +1458,9 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker) // - The current node and its siblings uses the lower bound of the parent before solving the LP // relaxation // - The lower bound of the parent is lower or equal to its children - worker->lower_bound = node_ptr->lower_bound; + worker->lower_bound = node_ptr->lower_bound; + worker->node_depth = node_ptr->depth; + worker->integer_infeasible = node_ptr->integer_infeasible; if (node_ptr->lower_bound > get_cutoff()) { search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); @@ -1509,22 +1511,22 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker) if (stack.size() > 0) { mip_node_t* node = stack.back(); stack.pop_back(); - node_queue_.push(node); + worker->node_queue.push(node); } exploration_stats_.nodes_unexplored += 2; if (round_dir == rounding_direction_t::UP) { - if (node_queue_.best_first_queue_size() < min_node_queue_size_) { - node_queue_.push(node_ptr->get_down_child()); + if (worker->node_queue.best_first_queue_size() < min_node_queue_size_) { + worker->node_queue.push(node_ptr->get_down_child()); } else { stack.push_front(node_ptr->get_down_child()); } stack.push_front(node_ptr->get_up_child()); } else { - if (node_queue_.best_first_queue_size() < min_node_queue_size_) { - node_queue_.push(node_ptr->get_up_child()); + if (worker->node_queue.best_first_queue_size() < min_node_queue_size_) { + worker->node_queue.push(node_ptr->get_up_child()); } else { stack.push_front(node_ptr->get_up_child()); } @@ -1539,11 +1541,6 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker) abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); } - lower_bound = get_lower_bound(); - upper_bound = upper_bound_; - rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); - if (stack.size() > 0 && (rel_gap <= settings_.relative_mip_gap_tol || abs_gap <= settings_.absolute_mip_gap_tol)) { // If the solver converged according to the gap rules, but we still have nodes to explore @@ -1552,14 +1549,48 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker) while (!stack.empty()) { auto node = stack.front(); stack.pop_front(); - node_queue_.push(node); + worker->node_queue.push(node); } } +} - if (settings_.num_threads > 1) { - bfs_worker_pool_.return_worker_to_pool(worker); - active_workers_per_strategy_[BEST_FIRST]--; +template +void branch_and_bound_t::best_first_search_with(bfs_worker_t* worker) +{ + f_t lower_bound = get_lower_bound(); + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + worker->node_queue.best_first_queue_size() > 0) { + worker->recompute_basis = true; + worker->recompute_bounds = true; + + worker->node_queue.lock(); + mip_node_t* start_node = worker->node_queue.pop_best_first(); + worker->lower_bound = start_node->lower_bound; + worker->node_depth = start_node->depth; + worker->integer_infeasible = start_node->integer_infeasible; + worker->node_queue.unlock(); + + if (get_cutoff() < start_node->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree_.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); + search_tree_.update(start_node, node_status_t::FATHOMED); + continue; + } + + plunge_with(worker, start_node); + + lower_bound = get_lower_bound(); + abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); } + + bfs_worker_pool_.return_worker_to_pool(worker); + active_workers_per_strategy_[BEST_FIRST]--; } template @@ -1576,7 +1607,7 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) worker->recompute_basis = true; worker->recompute_bounds = true; - search_tree_t dive_tree(std::move(*worker->start_node)); + search_tree_t dive_tree(std::move(worker->start_node)); std::deque*> stack; stack.push_front(&dive_tree.root); @@ -1596,7 +1627,9 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) mip_node_t* node_ptr = stack.front(); stack.pop_front(); - worker->lower_bound = node_ptr->lower_bound; + worker->lower_bound = node_ptr->lower_bound; + worker->node_depth = node_ptr->depth; + worker->integer_infeasible = node_ptr->integer_infeasible; if (node_ptr->lower_bound > get_cutoff()) { worker->recompute_basis = true; @@ -1651,6 +1684,21 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) active_workers_per_strategy_[search_strategy]--; } +template +void branch_and_bound_t::start_bfs_worker(mip_node_t* start_node) +{ + bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); + if (!idle_worker) { return; } + + idle_worker->node_queue.push(start_node); + idle_worker->lower_bound = start_node->lower_bound; + idle_worker->is_active = true; + active_workers_per_strategy_[BEST_FIRST]++; + +#pragma omp task affinity(idle_worker) + best_first_search_with(idle_worker); +} + template void branch_and_bound_t::run_scheduler() { @@ -1678,17 +1726,15 @@ void branch_and_bound_t::run_scheduler() } #endif - f_t lower_bound = get_lower_bound(); - f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - i_t last_node_depth = 0; - i_t last_int_infeas = 0; + f_t lower_bound = get_lower_bound(); + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + start_bfs_worker(search_tree_.root.get_up_child()); + start_bfs_worker(search_tree_.root.get_down_child()); while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && - (active_workers_per_strategy_[0] > 0 || node_queue_.best_first_queue_size() > 0)) { - bool launched_any_task = false; - + rel_gap > settings_.relative_mip_gap_tol && active_workers_per_strategy_[0] > 0) { repair_heuristic_solutions(); // If the guided diving was disabled previously due to the lack of an incumbent solution, @@ -1710,7 +1756,8 @@ void branch_and_bound_t::run_scheduler() } } - f_t now = toc(exploration_stats_.start_time); + std::vector*> active_workers = bfs_worker_pool_.get_active_workers(); + f_t now = toc(exploration_stats_.start_time); f_t time_since_last_log = exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; @@ -1718,10 +1765,16 @@ void branch_and_bound_t::run_scheduler() if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && time_since_last_log >= 1) || (time_since_last_log > 30) || now > settings_.time_limit) { - i_t queue_size = node_queue_.best_first_queue_size(); - i_t depth = queue_size > 0 ? node_queue_.bfs_top()->depth : last_node_depth; - i_t int_infeas = queue_size > 0 ? node_queue_.bfs_top()->integer_infeasible : last_int_infeas; - report(' ', upper_bound_, lower_bound, depth, int_infeas); + i_t node_depth = std::numeric_limits::max(); + i_t int_infeas = 0; + for (auto* worker : active_workers) { + if (worker->is_active) { + node_depth = std::min(node_depth, worker->node_depth.load()); + int_infeas = std::max(int_infeas, worker->integer_infeasible.load()); + } + } + + report(' ', upper_bound_, lower_bound, node_depth, int_infeas); exploration_stats_.last_log = tic(); exploration_stats_.nodes_since_last_log = 0; } @@ -1731,63 +1784,55 @@ void branch_and_bound_t::run_scheduler() break; } - for (auto strategy : strategies) { - if (active_workers_per_strategy_[strategy] >= max_num_workers_per_type[strategy]) { - continue; + for (int k = 0; k < active_workers.size() && bfs_worker_pool_.num_idle_workers() > 0; ++k) { + bfs_worker_t* worker = active_workers[k]; + if (worker->is_active && worker->node_queue.best_first_queue_size() > 1) { + worker->node_queue.lock(); + mip_node_t* node = worker->node_queue.pop_best_first(); + start_bfs_worker(node); + worker->node_queue.unlock(); } + } - if (strategy == BEST_FIRST) { - // Get an idle worker. - bfs_worker_t* bfs_worker = bfs_worker_pool_.get_idle_worker(); - if (bfs_worker == nullptr) { continue; } - - // If there any node left in the heap, we pop the top node and explore it. - std::optional*> start_node = node_queue_.pop_best_first(); - - if (!start_node.has_value()) { continue; } - mip_node_t* node_ptr = start_node.value(); + for (int i = 1; i < strategies.size(); ++i) { + auto strategy = strategies[i]; - if (get_cutoff() < node_ptr->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the - // current upper bound - search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); - search_tree_.update(node_ptr, node_status_t::FATHOMED); - continue; - } + for (int k = 0; k < active_workers.size(); ++k) { + if (active_workers_per_strategy_[strategy] > max_num_workers_per_type[strategy]) { break; } - // Remove the worker from the idle list. - bfs_worker_pool_.pop_idle_worker(); - bfs_worker->init(node_ptr, original_lp_); - last_node_depth = node_ptr->depth; - last_int_infeas = node_ptr->integer_infeasible; - active_workers_per_strategy_[strategy]++; - launched_any_task = true; + bfs_worker_t* bfs_worker = active_workers[k]; + if (!bfs_worker->is_active) { continue; } -#pragma omp task affinity(bfs_worker) - plunge_with(bfs_worker); - - } else { // Get an idle worker. - diving_worker_t* diving_worker = diving_worker_pool_.get_idle_worker(); + diving_worker_t* diving_worker = diving_worker_pool_.pop_idle_worker(); if (diving_worker == nullptr) { continue; } - std::optional*> start_node = node_queue_.pop_diving(); + bfs_worker->node_queue.lock(); + mip_node_t* start_node = bfs_worker->node_queue.pop_diving(); - if (!start_node.has_value()) { continue; } - mip_node_t* node_ptr = start_node.value(); + if (!start_node) { + diving_worker_pool_.return_worker_to_pool(diving_worker); + bfs_worker->node_queue.unlock(); + continue; + } - if (get_cutoff() < node_ptr->lower_bound || - node_ptr->depth < diving_settings.min_node_depth) { + if (get_cutoff() < start_node->lower_bound || + start_node->depth < diving_settings.min_node_depth) { + diving_worker_pool_.return_worker_to_pool(diving_worker); + bfs_worker->node_queue.unlock(); continue; } - bool is_feasible = diving_worker->init(node_ptr, strategy, original_lp_, settings_); - if (!is_feasible) { continue; } + bool is_feasible = diving_worker->init(start_node, strategy, original_lp_, settings_); + bfs_worker->node_queue.unlock(); + + if (!is_feasible) { + diving_worker_pool_.return_worker_to_pool(diving_worker); + continue; + } // Remove the worker from the idle list. - diving_worker_pool_.pop_idle_worker(); active_workers_per_strategy_[strategy]++; - launched_any_task = true; #pragma omp task affinity(diving_worker) dive_with(diving_worker); @@ -1804,25 +1849,26 @@ void branch_and_bound_t::run_scheduler() break; } - // If no new task was launched in this iteration, suspend temporarily the - // execution of the scheduler. As of 8/Jan/2026, GCC does not - // implement taskyield, but LLVM does. - if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + // // If no new task was launched in this iteration, suspend temporarily the + // // execution of the scheduler. As of 8/Jan/2026, GCC does not + // // implement taskyield, but LLVM does. + // if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } } template void branch_and_bound_t::single_threaded_solve() { - bfs_worker_t worker(0, original_lp_, Arow_, var_types_, settings_); + bfs_worker_pool_.init(1, original_lp_, Arow_, var_types_, settings_); + bfs_worker_t* worker = bfs_worker_pool_.get_worker(0); + node_queue_t& node_queue = worker->node_queue; f_t lower_bound = get_lower_bound(); f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && node_queue_.best_first_queue_size() > 0) { - bool launched_any_task = false; + rel_gap > settings_.relative_mip_gap_tol && node_queue.best_first_queue_size() > 0) { repair_heuristic_solutions(); f_t now = toc(exploration_stats_.start_time); @@ -1833,8 +1879,8 @@ void branch_and_bound_t::single_threaded_solve() if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && time_since_last_log >= 1) || (time_since_last_log > 30) || now > settings_.time_limit) { - i_t depth = node_queue_.bfs_top()->depth; - i_t int_infeas = node_queue_.bfs_top()->integer_infeasible; + i_t depth = node_queue.bfs_top()->depth; + i_t int_infeas = node_queue.bfs_top()->integer_infeasible; report(' ', upper_bound_, lower_bound, depth, int_infeas); exploration_stats_.last_log = tic(); exploration_stats_.nodes_since_last_log = 0; @@ -1846,20 +1892,18 @@ void branch_and_bound_t::single_threaded_solve() } // If there any node left in the heap, we pop the top node and explore it. - std::optional*> start_node = node_queue_.pop_best_first(); + mip_node_t* start_node = node_queue.pop_best_first(); - if (!start_node.has_value()) { continue; } - if (get_cutoff() < start_node.value()->lower_bound) { + if (!start_node) { continue; } + if (get_cutoff() < start_node->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound - search_tree_.graphviz_node( - settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - search_tree_.update(start_node.value(), node_status_t::FATHOMED); + search_tree_.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); + search_tree_.update(start_node, node_status_t::FATHOMED); continue; } - worker.init(start_node.value(), original_lp_); - plunge_with(&worker); + plunge_with(worker, start_node); lower_bound = get_lower_bound(); abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); @@ -2210,7 +2254,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (num_fractional != 0 && settings_.max_cut_passes > 0) { settings_.log.printf( - " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | " + " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | " + " " "Gap " "| Time |\n"); } @@ -2603,8 +2648,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_vstatus_, original_lp_, log); - node_queue_.push(search_tree_.root.get_down_child()); - node_queue_.push(search_tree_.root.get_up_child()); settings_.log.printf("Exploring the B&B tree using %d threads\n\n", settings_.num_threads); node_concurrent_halt_ = 0; @@ -2648,30 +2691,30 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut lower_bound = deterministic_compute_lower_bound(); solver_status_ = deterministic_global_termination_status_; } else { - if (node_queue_.best_first_queue_size() > 0) { - // We need to clear the queue and use the info in the search tree for the lower bound - while (node_queue_.best_first_queue_size() > 0) { - std::optional*> start_node = node_queue_.pop_best_first(); - - if (!start_node.has_value()) { continue; } - if (get_cutoff() < start_node.value()->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the - // current upper bound - search_tree_.graphviz_node( - settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - search_tree_.update(start_node.value(), node_status_t::FATHOMED); - continue; - } else { - node_queue_.push( - start_node.value()); // Needed to ensure we don't lose the correct lower bound - break; - } - } - lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() - : search_tree_.root.lower_bound; - } else { - lower_bound = search_tree_.root.lower_bound; - } + // if (node_queue_.best_first_queue_size() > 0) { + // // We need to clear the queue and use the info in the search tree for the lower bound + // while (node_queue_.best_first_queue_size() > 0) { + // std::optional*> start_node = node_queue_.pop_best_first(); + // + // if (!start_node.has_value()) { continue; } + // if (get_cutoff() < start_node.value()->lower_bound) { + // // This node was put on the heap earlier but its lower bound is now greater than the + // // current upper bound + // search_tree_.graphviz_node( + // settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); + // search_tree_.update(start_node.value(), node_status_t::FATHOMED); + // continue; + // } else { + // node_queue_.push( + // start_node.value()); // Needed to ensure we don't lose the correct lower bound + // break; + // } + // } + // lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() + // : search_tree_.root.lower_bound; + // } else { + lower_bound = search_tree_.root.lower_bound; + // } } set_final_solution(solution, lower_bound); return solver_status_; @@ -2734,8 +2777,8 @@ Work Units: 0 0.5 1. ──────────────────────────────────────────────────────────────────────────────────────────► Work Unit Time -Legend: ▓▓▓ = actively working ░░░ = waiting at barrier [hash] = state hash for verification - wut = work unit timestamp PC = pseudo-costs snap = snapshot (local copy) +Legend: ▓▓▓ = actively working ░░░ = waiting at barrier [hash] = state hash for +verification wut = work unit timestamp PC = pseudo-costs snap = snapshot (local copy) */ @@ -2773,23 +2816,22 @@ Producer Sync: Producing solutions in the past would break determinism, therefore this unidirectional sync ensures no such thing can occur. Instrumentation Aggregator: Collects multiple instrument vectors into a single aggregation point for estimating work from memory operations. Worker Context: Object -representing the "context" (e.g.: the worker) that should register the amount of work recorded There -is a 1context:1worker mapping. The Work Unit Scheduler registers such contexts and ensure they -remained synchronized together. Queued Integer Solutions: New integer solutions found within -horizons are queued with a work unit timestamp, in order to be sorted and played in order during the -sync callback. Creation Sequence: In nondeterministic mode, a single global atomic integer is used -to generate sequential IDs for the nodes. Since this is a global atomic, it is inherently +representing the "context" (e.g.: the worker) that should register the amount of work recorded +There is a 1context:1worker mapping. The Work Unit Scheduler registers such contexts and ensure +they remained synchronized together. Queued Integer Solutions: New integer solutions found within +horizons are queued with a work unit timestamp, in order to be sorted and played in order during +the sync callback. Creation Sequence: In nondeterministic mode, a single global atomic integer is +used to generate sequential IDs for the nodes. Since this is a global atomic, it is inherently nondeterministic. To fix this, in deterministic mode, nodes are addressed by a tuple - where "worker_id" is the ID of the worker that created this node, and "seq_id" is a sequential ID -local to the worker.\ This sequential ID is similar in principle to the global atomic ID sequence of -the nondeterminsitic mode but since it is local to each worker, it is updated serially and thus is -deterministic. worker IDs are unique, and sequence IDs are unique to their workers, therefor - is a globally unique node identifier. -Pseudocost Update: - Each worker updates its local pseudocosts when branching. These updates are queued within -horizons. During the horizon sync, these updates are all played in order, and the newly updated -global pseudocosts are broadcast to the worker's pseudocost snapshots for the coming horizon. + where "worker_id" is the ID of the worker that created this node, and "seq_id" is a sequential +ID local to the worker.\ This sequential ID is similar in principle to the global atomic ID +sequence of the nondeterminsitic mode but since it is local to each worker, it is updated serially +and thus is deterministic. worker IDs are unique, and sequence IDs are unique to their workers, +therefor is a globally unique node identifier. Pseudocost Update: Each worker +updates its local pseudocosts when branching. These updates are queued within horizons. During the +horizon sync, these updates are all played in order, and the newly updated global pseudocosts are +broadcast to the worker's pseudocost snapshots for the coming horizon. */ @@ -2907,7 +2949,8 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri "Sync%% | NoWork\n"); settings_.log.printf( " " - "-------+---------+----------+--------+---------+--------+----------+----------+-------+-------" + "-------+---------+----------+--------+---------+--------+----------+----------+-------+-----" + "--" "\n"); for (const auto& worker : *deterministic_workers_) { double sync_time = worker.work_context.total_sync_time; @@ -3697,8 +3740,10 @@ void branch_and_bound_t::deterministic_assign_diving_nodes() continue; // this worker is full, try next one } - auto entry = diving_heap_.pop(); - if (entry.has_value()) { worker.enqueue_dive_node(entry.value().node, original_lp_); } + if (!diving_heap_.empty()) { + auto entry = diving_heap_.pop(); + worker.enqueue_dive_node(entry.node, original_lp_); + } } diving_heap_.clear(); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 25bad6bebe..fecd295531 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -235,9 +235,6 @@ class branch_and_bound_t { // Pseudocosts pseudo_costs_t pc_; - // Heap storing the nodes waiting to be explored. - node_queue_t node_queue_; - // Search tree search_tree_t search_tree_; @@ -251,6 +248,8 @@ class branch_and_bound_t { // Worker pool dedicated to diving diving_worker_pool_t diving_worker_pool_; + pcgenerator_t rng_; + // Global status of the solver. omp_atomic_t solver_status_; omp_atomic_t is_running_{false}; @@ -291,10 +290,14 @@ class branch_and_bound_t { // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); + void start_bfs_worker(mip_node_t* start_node); + + void best_first_search_with(bfs_worker_t* worker); + // We use best-first to pick the `start_node` and then perform a depth-first search // from this node (i.e., a plunge). It can only backtrack to a sibling node. // Unexplored nodes in the subtree are inserted back into the global heap. - void plunge_with(bfs_worker_t* worker); + void plunge_with(bfs_worker_t* worker, mip_node_t* start_node); // Perform a deep dive in the subtree determined by the `start_node` in order // to find integer feasible solutions. diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp index a5c3769126..482c8f6c04 100644 --- a/cpp/src/branch_and_bound/deterministic_workers.hpp +++ b/cpp/src/branch_and_bound/deterministic_workers.hpp @@ -193,8 +193,8 @@ class deterministic_bfs_worker_t plunge_stack.pop_front(); return node; } - auto node_opt = backlog.pop(); - return node_opt.has_value() ? node_opt.value() : nullptr; + + return !backlog.empty() ? backlog.pop() : nullptr; } size_t queue_size() const diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 09d030c96e..6b9d551ec4 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -44,10 +44,8 @@ class heap_t { std::push_heap(buffer.begin(), buffer.end(), comp); } - std::optional pop() + T pop() { - if (buffer.empty()) return std::nullopt; - std::pop_heap(buffer.begin(), buffer.end(), comp); T node = std::move(buffer.back()); buffer.pop_back(); @@ -109,58 +107,59 @@ class node_queue_t { public: void push(mip_node_t* new_node) { - std::lock_guard lock(mutex); + std::lock_guard lock(mutex); auto entry = std::make_shared(new_node); best_first_heap.push(entry); diving_heap.push(entry); } - std::optional*> pop_best_first() + // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call + // `unlock()` afterward. + mip_node_t* pop_best_first() { - std::lock_guard lock(mutex); + if (best_first_heap.empty()) { return nullptr; } auto entry = best_first_heap.pop(); - - if (entry.has_value()) { return std::exchange(entry.value()->node, nullptr); } - - return std::nullopt; + return std::exchange(entry->node, nullptr); } - std::optional*> pop_diving() + // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call + // `unlock()` afterward. + mip_node_t* pop_diving() { - std::lock_guard lock(mutex); - while (!diving_heap.empty()) { - auto entry = diving_heap.pop(); - - if (entry.has_value()) { - if (auto node_ptr = entry.value()->node; node_ptr != nullptr) { return node_ptr; } - } + auto entry = diving_heap.pop(); + auto node_ptr = entry->node; + if (node_ptr != nullptr) { return node_ptr; } } - return std::nullopt; + return nullptr; } + void lock() { mutex.lock(); } + void unlock() { mutex.unlock(); } + i_t diving_queue_size() { - std::lock_guard lock(mutex); + std::lock_guard lock(mutex); return diving_heap.size(); } i_t best_first_queue_size() { - std::lock_guard lock(mutex); + std::lock_guard lock(mutex); return best_first_heap.size(); } f_t get_lower_bound() { - std::lock_guard lock(mutex); + std::lock_guard lock(mutex); return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; } + // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call + // `unlock()` afterward. mip_node_t* bfs_top() { - std::lock_guard lock(mutex); return best_first_heap.empty() ? nullptr : best_first_heap.top()->node; } }; diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 65fe5c32dd..0958515bc2 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -18,6 +18,8 @@ #include #include +#include "node_queue.hpp" + namespace cuopt::linear_programming::dual_simplex { template @@ -41,6 +43,8 @@ class branch_and_bound_worker_t { omp_atomic_t search_strategy; omp_atomic_t is_active; omp_atomic_t lower_bound; + omp_atomic_t node_depth; + omp_atomic_t integer_infeasible; lp_problem_t leaf_problem; lp_solution_t leaf_solution; @@ -55,7 +59,6 @@ class branch_and_bound_worker_t { std::vector start_lower; std::vector start_upper; - mip_node_t* start_node; pcgenerator_t rng; @@ -67,7 +70,7 @@ class branch_and_bound_worker_t { const csr_matrix_t& Arow, const std::vector& var_type, const simplex_solver_settings_t& settings, - const uint64_t rng_offset = 0) + uint64_t rng_offset = 0) : worker_id(worker_id), search_strategy(BEST_FIRST), is_active(false), @@ -79,7 +82,6 @@ class branch_and_bound_worker_t { nonbasic_list(), node_presolver(leaf_problem, Arow, {}, var_type), bounds_changed(original_lp.num_cols, false), - start_node(nullptr), rng(settings.random_seed + pcgenerator_t::default_seed + rng_offset + worker_id, pcgenerator_t::default_stream ^ (worker_id + rng_offset)) { @@ -112,18 +114,38 @@ template class bfs_worker_t : public branch_and_bound_worker_t { public: using Base = branch_and_bound_worker_t; - using Base::Base; - - // Set the `start_node` for best-first search. - void init(mip_node_t* node, const lp_problem_t& original_lp) + bfs_worker_t(i_t worker_id, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings, + uint64_t rng_offset = 0) + : Base(worker_id, original_lp, Arow, var_type, settings, rng_offset) { - Base::start_node = node; Base::start_lower = original_lp.lower; Base::start_upper = original_lp.upper; Base::search_strategy = BEST_FIRST; - Base::lower_bound = node->lower_bound; - Base::is_active = true; } + + f_t get_lower_bound() + { + f_t lower_bound = std::numeric_limits::infinity(); + + if (Base::is_active) { + if (node_queue.best_first_queue_size() > 0) { + node_queue.lock(); + mip_node_t* node = node_queue.bfs_top(); + if (node) { lower_bound = node->lower_bound; } + node_queue.unlock(); + } + + lower_bound = std::min(lower_bound, Base::lower_bound.load()); + } + + return lower_bound; + } + + node_queue_t node_queue; }; template @@ -140,8 +162,7 @@ class diving_worker_t : public branch_and_bound_worker_t { const lp_problem_t& original_lp, const simplex_solver_settings_t& settings) { - internal_node = node->detach_copy(); - Base::start_node = &internal_node; + start_node = node->detach_copy(); Base::start_lower = original_lp.lower; Base::start_upper = original_lp.upper; Base::search_strategy = type; @@ -154,14 +175,12 @@ class diving_worker_t : public branch_and_bound_worker_t { settings, Base::bounds_changed, Base::start_lower, Base::start_upper); } - protected: - // For diving, we need to store the full node instead of - // of just a pointer, since it is not stored in the tree anymore. - // To keep the same interface across all worker types, - // this will be used as a temporary storage and - // will be pointed by `start_node`. - // For exploration, this will not be used. - mip_node_t internal_node; + f_t get_lower_bound() + { + return Base::is_active ? Base::lower_bound.load() : std::numeric_limits::infinity(); + } + + mip_node_t start_node; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index 8397cd6174..693a1879ff 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -35,34 +35,22 @@ class worker_pool_t { is_initialized = true; } - // Here, we are assuming that the scheduler is the only - // thread that can retrieve/pop an idle worker. - WorkerType* get_idle_worker() + WorkerType* pop_idle_worker() { - std::lock_guard lock(mutex_); + std::lock_guard lock(mutex_); if (idle_workers_.empty()) { return nullptr; } else { i_t idx = idle_workers_.front(); - return workers_[idx].get(); - } - } - - // Here, we are assuming that the scheduler is the only - // thread that can retrieve/pop an idle worker. - void pop_idle_worker() - { - std::lock_guard lock(mutex_); - if (!idle_workers_.empty()) { idle_workers_.pop_front(); num_idle_workers_--; + return workers_[idx].get(); } } - void return_worker_to_pool(WorkerType* worker) { worker->is_active = false; - std::lock_guard lock(mutex_); + std::lock_guard lock(mutex_); idle_workers_.push_back(worker->worker_id); num_idle_workers_++; } @@ -73,15 +61,25 @@ class worker_pool_t { if (is_initialized) { for (i_t i = 0; i < workers_.size(); ++i) { - if (workers_[i]->is_active) { - lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); - } + lower_bound = std::min(workers_[i]->get_lower_bound(), lower_bound); } } return lower_bound; } + WorkerType* get_worker(i_t id) { return workers_[id].get(); } + + std::vector get_active_workers() + { + std::vector active_workers; + for (i_t i = 0; i < workers_.size(); ++i) { + if (workers_[i]->is_active) { active_workers.push_back(workers_[i].get()); } + } + + return active_workers; + } + i_t num_idle_workers() const { return num_idle_workers_; } i_t num_workers() const { return workers_.size(); } From 6a391875dfba24e4ffcaf09b9bb422ed066ac149 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Thu, 9 Apr 2026 11:21:53 +0200 Subject: [PATCH 14/53] implemented a lock-free stack to track the idle workers. fix potential crash in work-stealing Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 44 ++++++++++++------- cpp/src/branch_and_bound/branch_and_bound.hpp | 2 +- cpp/src/branch_and_bound/node_queue.hpp | 7 --- cpp/src/branch_and_bound/worker.hpp | 9 +--- cpp/src/branch_and_bound/worker_pool.hpp | 27 ++++-------- cpp/src/utilities/omp_helpers.hpp | 24 ++++++---- 6 files changed, 55 insertions(+), 58 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 45cf491215..a94503333c 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1569,9 +1569,14 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t worker->node_queue.lock(); mip_node_t* start_node = worker->node_queue.pop_best_first(); - worker->lower_bound = start_node->lower_bound; - worker->node_depth = start_node->depth; - worker->integer_infeasible = start_node->integer_infeasible; + if (!start_node) { + worker->node_queue.unlock(); + continue; + } + + worker->lower_bound = start_node->lower_bound; + worker->node_depth = start_node->depth; + worker->integer_infeasible = start_node->integer_infeasible; worker->node_queue.unlock(); if (get_cutoff() < start_node->lower_bound) { @@ -1685,7 +1690,7 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) } template -void branch_and_bound_t::start_bfs_worker(mip_node_t* start_node) +void branch_and_bound_t::start_new_bfs_worker(mip_node_t* start_node) { bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); if (!idle_worker) { return; } @@ -1730,11 +1735,13 @@ void branch_and_bound_t::run_scheduler() f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - start_bfs_worker(search_tree_.root.get_up_child()); - start_bfs_worker(search_tree_.root.get_down_child()); + start_new_bfs_worker(search_tree_.root.get_up_child()); + start_new_bfs_worker(search_tree_.root.get_down_child()); while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && active_workers_per_strategy_[0] > 0) { + bool launched_any_task = false; + repair_heuristic_solutions(); // If the guided diving was disabled previously due to the lack of an incumbent solution, @@ -1787,10 +1794,11 @@ void branch_and_bound_t::run_scheduler() for (int k = 0; k < active_workers.size() && bfs_worker_pool_.num_idle_workers() > 0; ++k) { bfs_worker_t* worker = active_workers[k]; if (worker->is_active && worker->node_queue.best_first_queue_size() > 1) { - worker->node_queue.lock(); + std::lock_guard lock(worker->node_queue); mip_node_t* node = worker->node_queue.pop_best_first(); - start_bfs_worker(node); - worker->node_queue.unlock(); + if (!node) { continue; } + start_new_bfs_worker(node); + launched_any_task = true; } } @@ -1831,8 +1839,8 @@ void branch_and_bound_t::run_scheduler() continue; } - // Remove the worker from the idle list. active_workers_per_strategy_[strategy]++; + launched_any_task = true; #pragma omp task affinity(diving_worker) dive_with(diving_worker); @@ -1849,10 +1857,10 @@ void branch_and_bound_t::run_scheduler() break; } - // // If no new task was launched in this iteration, suspend temporarily the - // // execution of the scheduler. As of 8/Jan/2026, GCC does not - // // implement taskyield, but LLVM does. - // if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + // If no new task was launched in this iteration, suspend temporarily the + // execution of the scheduler. As of 8/Jan/2026, GCC does not + // implement taskyield, but LLVM does. + if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } } @@ -1879,8 +1887,8 @@ void branch_and_bound_t::single_threaded_solve() if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && time_since_last_log >= 1) || (time_since_last_log > 30) || now > settings_.time_limit) { - i_t depth = node_queue.bfs_top()->depth; - i_t int_infeas = node_queue.bfs_top()->integer_infeasible; + i_t depth = worker->node_depth; + i_t int_infeas = worker->integer_infeasible; report(' ', upper_bound_, lower_bound, depth, int_infeas); exploration_stats_.last_log = tic(); exploration_stats_.nodes_since_last_log = 0; @@ -2691,6 +2699,10 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut lower_bound = deterministic_compute_lower_bound(); solver_status_ = deterministic_global_termination_status_; } else { + // for (int i = 0; i < bfs_worker_pool_.num_workers(); ++i) { + // + // } + // if (node_queue_.best_first_queue_size() > 0) { // // We need to clear the queue and use the info in the search tree for the lower bound // while (node_queue_.best_first_queue_size() > 0) { diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index fecd295531..aef78c9f6d 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -290,7 +290,7 @@ class branch_and_bound_t { // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); - void start_bfs_worker(mip_node_t* start_node); + void start_new_bfs_worker(mip_node_t* start_node); void best_first_search_with(bfs_worker_t* worker); diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 6b9d551ec4..5b8669c6e5 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -155,13 +155,6 @@ class node_queue_t { std::lock_guard lock(mutex); return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; } - - // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call - // `unlock()` afterward. - mip_node_t* bfs_top() - { - return best_first_heap.empty() ? nullptr : best_first_heap.top()->node; - } }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 0958515bc2..caa4e5dcfc 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -132,14 +132,7 @@ class bfs_worker_t : public branch_and_bound_worker_t { f_t lower_bound = std::numeric_limits::infinity(); if (Base::is_active) { - if (node_queue.best_first_queue_size() > 0) { - node_queue.lock(); - mip_node_t* node = node_queue.bfs_top(); - if (node) { lower_bound = node->lower_bound; } - node_queue.unlock(); - } - - lower_bound = std::min(lower_bound, Base::lower_bound.load()); + lower_bound = std::min(node_queue.get_lower_bound(), Base::lower_bound.load()); } return lower_bound; diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index 693a1879ff..ab3385c564 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -8,6 +8,7 @@ #pragma once #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -25,11 +26,11 @@ class worker_pool_t { const uint64_t rng_offset = 0) { workers_.resize(num_workers); - num_idle_workers_ = num_workers; + idle_stack_.init(num_workers); for (i_t i = 0; i < num_workers; ++i) { workers_[i] = std::make_unique(i, original_lp, Arow, var_type, settings, rng_offset); - idle_workers_.push_front(i); + idle_stack_.push(i); } is_initialized = true; @@ -37,22 +38,14 @@ class worker_pool_t { WorkerType* pop_idle_worker() { - std::lock_guard lock(mutex_); - if (idle_workers_.empty()) { - return nullptr; - } else { - i_t idx = idle_workers_.front(); - idle_workers_.pop_front(); - num_idle_workers_--; - return workers_[idx].get(); - } + std::optional idx = idle_stack_.pop(); + return idx ? workers_[*idx].get() : nullptr; } + void return_worker_to_pool(WorkerType* worker) { worker->is_active = false; - std::lock_guard lock(mutex_); - idle_workers_.push_back(worker->worker_id); - num_idle_workers_++; + idle_stack_.push(worker->worker_id); } f_t get_lower_bound() @@ -80,7 +73,7 @@ class worker_pool_t { return active_workers; } - i_t num_idle_workers() const { return num_idle_workers_; } + i_t num_idle_workers() const { return idle_stack_.size(); } i_t num_workers() const { return workers_.size(); } private: @@ -88,9 +81,7 @@ class worker_pool_t { std::vector> workers_; bool is_initialized = false; - omp_mutex_t mutex_; - std::deque idle_workers_; - omp_atomic_t num_idle_workers_; + lock_free_index_stack_t idle_stack_; }; template diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index 8890a7487a..43e93f714d 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -134,17 +134,11 @@ class omp_atomic_t { private: T val; -#ifndef __NVCC__ friend double fetch_min(omp_atomic_t& atomic_var, double other); friend double fetch_max(omp_atomic_t& atomic_var, double other); -#endif + friend bool compare_exchange(omp_atomic_t& atomic_var, int& expected, int desired); }; -// Atomic CAS are only supported in OpenMP v5.1 -// (gcc 12+ or clang 14+), however, nvcc (or the host compiler) cannot -// parse it correctly yet -#ifndef __NVCC__ - // Free non-template functions are necessary because of a clang 20 bug // when omp atomic compare is used within a templated context. // see https://github.com/llvm/llvm-project/issues/127466 @@ -169,8 +163,22 @@ inline double fetch_max(omp_atomic_t& atomic_var, double other) } return old; } -#endif +// CAS: atomically sets `atomic_var` to `desired` if it equals `expected`. +// On failure, loads the current value into expected. +// Returns true if the exchange happened. +inline bool compare_exchange(omp_atomic_t& atomic_var, int& expected, int desired) +{ + int old; +#pragma omp atomic compare capture + { + old = atomic_var.val; + if (atomic_var.val == expected) { atomic_var.val = desired; } + } + bool success = (old == expected); + if (!success) { expected = old; } + return success; +} #endif } // namespace cuopt From dec671cccc2bd942ca7c5437b1b43af565f86a9d Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 10 Apr 2026 10:53:24 +0200 Subject: [PATCH 15/53] fixed lower bound calculation at end of the B&B. reverted to locking queue for now. refactoring. Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 177 +++++++++--------- cpp/src/branch_and_bound/branch_and_bound.hpp | 5 +- cpp/src/branch_and_bound/mip_node.hpp | 36 ++-- cpp/src/branch_and_bound/node_queue.hpp | 72 +++---- cpp/src/branch_and_bound/worker.hpp | 22 ++- cpp/src/branch_and_bound/worker_pool.hpp | 38 ++-- 6 files changed, 180 insertions(+), 170 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index a94503333c..1ba3847cc5 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1486,13 +1486,15 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, if (lp_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; break; - } else if (lp_status == dual::status_t::CONCURRENT_LIMIT) { + } + + if (lp_status == dual::status_t::CONCURRENT_LIMIT) { stack.push_front(node_ptr); break; - } else if (lp_status == dual::status_t::ITERATION_LIMIT) { - break; } + if (lp_status == dual::status_t::ITERATION_LIMIT) { break; } + ++exploration_stats_.nodes_since_last_log; ++exploration_stats_.nodes_explored; --exploration_stats_.nodes_unexplored; @@ -1650,11 +1652,9 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) if (lp_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; break; - } else if (lp_status == dual::status_t::CONCURRENT_LIMIT) { - break; - } else if (lp_status == dual::status_t::ITERATION_LIMIT) { - break; } + if (lp_status == dual::status_t::CONCURRENT_LIMIT) { break; } + if (lp_status == dual::status_t::ITERATION_LIMIT) { break; } ++dive_stats.nodes_explored; @@ -1673,7 +1673,7 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) } } - // Remove nodes that we no longer can backtrack to (i.e., from the current node, we can only + // Remove nodes that we can no longer backtrack to (i.e., from the current node, we can only // backtrack to a node that is has a depth of at most 5 levels lower than the current node). if (stack.size() > 1 && stack.front()->depth - stack.back()->depth > diving_backtrack_limit) { stack.pop_back(); @@ -1690,18 +1690,53 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) } template -void branch_and_bound_t::start_new_bfs_worker(mip_node_t* start_node) +bool branch_and_bound_t::launch_bfs_worker(mip_node_t* start_node) { bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); - if (!idle_worker) { return; } + if (!idle_worker) { return false; } - idle_worker->node_queue.push(start_node); - idle_worker->lower_bound = start_node->lower_bound; - idle_worker->is_active = true; + idle_worker->init(start_node); active_workers_per_strategy_[BEST_FIRST]++; #pragma omp task affinity(idle_worker) best_first_search_with(idle_worker); + + return true; +} + +template +bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* bfs_worker, + search_strategy_t diving_type, + i_t min_node_depth) +{ + // Get an idle worker. + diving_worker_t* diving_worker = diving_worker_pool_.pop_idle_worker(); + if (diving_worker == nullptr) { return false; } + + bfs_worker->node_queue.lock(); + mip_node_t* start_node = bfs_worker->node_queue.pop_diving(); + + if (!start_node || get_cutoff() < start_node->lower_bound || start_node->depth < min_node_depth) { + diving_worker_pool_.return_worker_to_pool(diving_worker); + bfs_worker->node_queue.unlock(); + return false; + } + + diving_worker->init(start_node, original_lp_, diving_type); + bfs_worker->node_queue.unlock(); + + bool is_feasible = diving_worker->presolve_start_bounds(settings_); + if (!is_feasible) { + diving_worker_pool_.return_worker_to_pool(diving_worker); + return false; + } + + active_workers_per_strategy_[diving_type]++; + +#pragma omp task affinity(diving_worker) + dive_with(diving_worker); + + return true; } template @@ -1735,8 +1770,8 @@ void branch_and_bound_t::run_scheduler() f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - start_new_bfs_worker(search_tree_.root.get_up_child()); - start_new_bfs_worker(search_tree_.root.get_down_child()); + launch_bfs_worker(search_tree_.root.get_up_child()); + launch_bfs_worker(search_tree_.root.get_down_child()); while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && active_workers_per_strategy_[0] > 0) { @@ -1763,8 +1798,7 @@ void branch_and_bound_t::run_scheduler() } } - std::vector*> active_workers = bfs_worker_pool_.get_active_workers(); - f_t now = toc(exploration_stats_.start_time); + f_t now = toc(exploration_stats_.start_time); f_t time_since_last_log = exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; @@ -1774,7 +1808,8 @@ void branch_and_bound_t::run_scheduler() (time_since_last_log > 30) || now > settings_.time_limit) { i_t node_depth = std::numeric_limits::max(); i_t int_infeas = 0; - for (auto* worker : active_workers) { + for (int k = 0; k < num_bfs_workers; ++k) { + bfs_worker_t* worker = bfs_worker_pool_.get_worker(k); if (worker->is_active) { node_depth = std::min(node_depth, worker->node_depth.load()); int_infeas = std::max(int_infeas, worker->integer_infeasible.load()); @@ -1791,59 +1826,32 @@ void branch_and_bound_t::run_scheduler() break; } - for (int k = 0; k < active_workers.size() && bfs_worker_pool_.num_idle_workers() > 0; ++k) { - bfs_worker_t* worker = active_workers[k]; + for (i_t k = 0; k < num_bfs_workers && bfs_worker_pool_.num_idle_workers() > 0; ++k) { + bfs_worker_t* worker = bfs_worker_pool_.get_worker(k); if (worker->is_active && worker->node_queue.best_first_queue_size() > 1) { std::lock_guard lock(worker->node_queue); mip_node_t* node = worker->node_queue.pop_best_first(); if (!node) { continue; } - start_new_bfs_worker(node); + if (!launch_bfs_worker(node)) { break; } launched_any_task = true; } } for (int i = 1; i < strategies.size(); ++i) { - auto strategy = strategies[i]; - - for (int k = 0; k < active_workers.size(); ++k) { - if (active_workers_per_strategy_[strategy] > max_num_workers_per_type[strategy]) { break; } - - bfs_worker_t* bfs_worker = active_workers[k]; + auto diving_type = strategies[i]; + i_t num_new_tasks = + max_num_workers_per_type[diving_type] - active_workers_per_strategy_[diving_type]; + + while (num_new_tasks > 0 && diving_worker_pool_.num_idle_workers() > 0) { + --num_new_tasks; + i_t k = rng_.uniform(0, num_bfs_workers); + bfs_worker_t* bfs_worker = bfs_worker_pool_.get_worker(k); if (!bfs_worker->is_active) { continue; } + if (bfs_worker->node_queue.diving_queue_size() == 0) { continue; } - // Get an idle worker. - diving_worker_t* diving_worker = diving_worker_pool_.pop_idle_worker(); - if (diving_worker == nullptr) { continue; } - - bfs_worker->node_queue.lock(); - mip_node_t* start_node = bfs_worker->node_queue.pop_diving(); - - if (!start_node) { - diving_worker_pool_.return_worker_to_pool(diving_worker); - bfs_worker->node_queue.unlock(); - continue; - } - - if (get_cutoff() < start_node->lower_bound || - start_node->depth < diving_settings.min_node_depth) { - diving_worker_pool_.return_worker_to_pool(diving_worker); - bfs_worker->node_queue.unlock(); - continue; + if (launch_diving_worker(bfs_worker, diving_type, diving_settings.min_node_depth)) { + launched_any_task = true; } - - bool is_feasible = diving_worker->init(start_node, strategy, original_lp_, settings_); - bfs_worker->node_queue.unlock(); - - if (!is_feasible) { - diving_worker_pool_.return_worker_to_pool(diving_worker); - continue; - } - - active_workers_per_strategy_[strategy]++; - launched_any_task = true; - -#pragma omp task affinity(diving_worker) - dive_with(diving_worker); } } @@ -2699,35 +2707,32 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut lower_bound = deterministic_compute_lower_bound(); solver_status_ = deterministic_global_termination_status_; } else { - // for (int i = 0; i < bfs_worker_pool_.num_workers(); ++i) { - // - // } + lower_bound = std::numeric_limits::infinity(); - // if (node_queue_.best_first_queue_size() > 0) { - // // We need to clear the queue and use the info in the search tree for the lower bound - // while (node_queue_.best_first_queue_size() > 0) { - // std::optional*> start_node = node_queue_.pop_best_first(); - // - // if (!start_node.has_value()) { continue; } - // if (get_cutoff() < start_node.value()->lower_bound) { - // // This node was put on the heap earlier but its lower bound is now greater than the - // // current upper bound - // search_tree_.graphviz_node( - // settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - // search_tree_.update(start_node.value(), node_status_t::FATHOMED); - // continue; - // } else { - // node_queue_.push( - // start_node.value()); // Needed to ensure we don't lose the correct lower bound - // break; - // } - // } - // lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() - // : search_tree_.root.lower_bound; - // } else { - lower_bound = search_tree_.root.lower_bound; - // } + for (int i = 0; i < bfs_worker_pool_.num_workers(); ++i) { + bfs_worker_t* worker = bfs_worker_pool_.get_worker(i); + + // We need to clear the queue and use the info in the search tree for the lower bound + while (worker->node_queue.best_first_queue_size() > 0) { + mip_node_t* start_node = worker->node_queue.pop_best_first(); + + if (get_cutoff() < start_node->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree_.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); + search_tree_.update(start_node, node_status_t::FATHOMED); + } else { + // Needed to ensure we don't lose the correct lower bound + worker->node_queue.push(start_node); + lower_bound = std::min(lower_bound, worker->node_queue.get_lower_bound()); + break; + } + } + } + + if (!std::isfinite(lower_bound)) { lower_bound = search_tree_.root.lower_bound; } } + set_final_solution(solution, lower_bound); return solver_status_; } diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index aef78c9f6d..d28a6fc0da 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -290,7 +290,10 @@ class branch_and_bound_t { // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); - void start_new_bfs_worker(mip_node_t* start_node); + bool launch_bfs_worker(mip_node_t* start_node); + bool launch_diving_worker(bfs_worker_t* bfs_worker, + std::vector::value_type diving_type, + i_t min_node_depth); void best_first_search_with(bfs_worker_t* worker); diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index cce23c3bd7..61228b1a8d 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -40,24 +40,6 @@ inline bool inactive_status(node_status_t status) template class mip_node_t { public: - ~mip_node_t() - { - // Iterative teardown to avoid stack overflow on deep trees. - // Detach all descendants breadth-first, then destroy them as leaves. - std::vector> nodes; - for (auto& c : children) { - if (c) { nodes.push_back(std::move(c)); } - } - // nodes.size() grows so that this loop only terminates when only leaves remain - for (size_t i = 0; i < nodes.size(); ++i) { - for (auto& c : nodes[i]->children) { - if (c) { nodes.push_back(std::move(c)); } - } - } - - // scope-exit ensure destruction of all detached leaves - } - mip_node_t(mip_node_t&&) = default; mip_node_t& operator=(mip_node_t&&) = default; @@ -123,6 +105,22 @@ class mip_node_t { children[1] = nullptr; } + ~mip_node_t() + { + // Iterative teardown to avoid stack overflow on deep trees. + // Detach all descendants breadth-first, then destroy them as leaves. + std::vector> nodes; + for (auto& c : children) { + if (c) { nodes.push_back(std::move(c)); } + } + // nodes.size() grows so that this loop only terminates when only leaves remain + for (size_t i = 0; i < nodes.size(); ++i) { + for (auto& c : nodes[i]->children) { + if (c) { nodes.push_back(std::move(c)); } + } + } + } + void get_variable_bounds(std::vector& lower, std::vector& upper, std::vector& bounds_changed) const @@ -256,7 +254,7 @@ class mip_node_t { // This method creates a copy of the current node // with its parent set to `nullptr` // This detaches the node from the tree. - mip_node_t detach_copy() const + mip_node_t detach_copy() const { mip_node_t copy; copy.lower_bound = lower_bound; diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 5b8669c6e5..856d3af1c5 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -68,42 +68,6 @@ class heap_t { // A queue storing the nodes waiting to be explored/dived from. template class node_queue_t { - private: - struct heap_entry_t { - mip_node_t* node = nullptr; - f_t lower_bound = -inf; - f_t score = inf; - - heap_entry_t(mip_node_t* new_node) - : node(new_node), lower_bound(new_node->lower_bound), score(new_node->objective_estimate) - { - } - }; - - // Comparision function for ordering the nodes based on their lower bound with - // lowest one being explored first. - struct lower_bound_comp { - bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) - { - // `a` will be placed after `b` - return a->lower_bound > b->lower_bound; - } - }; - - // Comparision function for ordering the nodes based on some score (currently the pseudocost - // estimate) with the lowest being explored first. - struct score_comp { - bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) - { - // `a` will be placed after `b` - return a->score > b->score; - } - }; - - heap_t, lower_bound_comp> best_first_heap; - heap_t, score_comp> diving_heap; - omp_mutex_t mutex; - public: void push(mip_node_t* new_node) { @@ -155,6 +119,42 @@ class node_queue_t { std::lock_guard lock(mutex); return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; } + + private: + struct heap_entry_t { + mip_node_t* node = nullptr; + f_t lower_bound = -inf; + f_t score = inf; + + heap_entry_t(mip_node_t* new_node) + : node(new_node), lower_bound(new_node->lower_bound), score(new_node->objective_estimate) + { + } + }; + + // Comparision function for ordering the nodes based on their lower bound with + // lowest one being explored first. + struct lower_bound_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->lower_bound > b->lower_bound; + } + }; + + // Comparision function for ordering the nodes based on some score (currently the pseudocost + // estimate) with the lowest being explored first. + struct score_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->score > b->score; + } + }; + + heap_t, lower_bound_comp> best_first_heap; + heap_t, score_comp> diving_heap; + omp_mutex_t mutex; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index caa4e5dcfc..acded68f9d 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -138,6 +138,13 @@ class bfs_worker_t : public branch_and_bound_worker_t { return lower_bound; } + void init(mip_node_t* node) + { + node_queue.push(node); + Base::lower_bound = node->lower_bound; + Base::is_active = true; + } + node_queue_t node_queue; }; @@ -147,23 +154,22 @@ class diving_worker_t : public branch_and_bound_worker_t { using Base = branch_and_bound_worker_t; using Base::Base; - // Initialize the worker for diving, setting the `start_node`, `start_lower` and - // `start_upper`. Returns `true` if the starting node is feasible via - // bounds propagation. - bool init(mip_node_t* node, - search_strategy_t type, + void init(const mip_node_t* node, const lp_problem_t& original_lp, - const simplex_solver_settings_t& settings) + search_strategy_t strategy) { start_node = node->detach_copy(); Base::start_lower = original_lp.lower; Base::start_upper = original_lp.upper; - Base::search_strategy = type; + Base::search_strategy = strategy; Base::lower_bound = node->lower_bound; Base::is_active = true; - std::fill(Base::bounds_changed.begin(), Base::bounds_changed.end(), false); node->get_variable_bounds(Base::start_lower, Base::start_upper, Base::bounds_changed); + } + + bool presolve_start_bounds(const simplex_solver_settings_t& settings) + { return Base::node_presolver.bounds_strengthening( settings, Base::bounds_changed, Base::start_lower, Base::start_upper); } diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index ab3385c564..eabdc0beaa 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -8,7 +8,6 @@ #pragma once #include -#include namespace cuopt::linear_programming::dual_simplex { @@ -26,11 +25,11 @@ class worker_pool_t { const uint64_t rng_offset = 0) { workers_.resize(num_workers); - idle_stack_.init(num_workers); + num_idle_workers_ = num_workers; for (i_t i = 0; i < num_workers; ++i) { workers_[i] = std::make_unique(i, original_lp, Arow, var_type, settings, rng_offset); - idle_stack_.push(i); + idle_workers_.push_front(i); } is_initialized = true; @@ -38,14 +37,22 @@ class worker_pool_t { WorkerType* pop_idle_worker() { - std::optional idx = idle_stack_.pop(); - return idx ? workers_[*idx].get() : nullptr; + std::lock_guard lock(mutex_); + if (idle_workers_.empty()) { + return nullptr; + } else { + i_t idx = idle_workers_.front(); + idle_workers_.pop_front(); + num_idle_workers_--; + return workers_[idx].get(); + } } - void return_worker_to_pool(WorkerType* worker) { worker->is_active = false; - idle_stack_.push(worker->worker_id); + std::lock_guard lock(mutex_); + idle_workers_.push_back(worker->worker_id); + num_idle_workers_++; } f_t get_lower_bound() @@ -63,25 +70,16 @@ class worker_pool_t { WorkerType* get_worker(i_t id) { return workers_[id].get(); } - std::vector get_active_workers() - { - std::vector active_workers; - for (i_t i = 0; i < workers_.size(); ++i) { - if (workers_[i]->is_active) { active_workers.push_back(workers_[i].get()); } - } - - return active_workers; - } - - i_t num_idle_workers() const { return idle_stack_.size(); } + i_t num_idle_workers() const { return num_idle_workers_; } i_t num_workers() const { return workers_.size(); } private: - // Worker pool std::vector> workers_; bool is_initialized = false; - lock_free_index_stack_t idle_stack_; + omp_mutex_t mutex_; + std::deque idle_workers_; + omp_atomic_t num_idle_workers_; }; template From 1b3a2824b1ad42a666300c203176e8c4fdb291e6 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 10 Apr 2026 14:49:59 +0200 Subject: [PATCH 16/53] correctly handles the node in the stack when the solver stops if they are present Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 25 +++++++++++-------- cpp/src/utilities/omp_helpers.hpp | 4 +-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 1ba3847cc5..85f11c48bf 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1473,11 +1473,13 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, if (toc(exploration_stats_.start_time) > settings_.time_limit) { solver_status_ = mip_status_t::TIME_LIMIT; + stack.push_front(node_ptr); break; } if (exploration_stats_.nodes_explored >= settings_.node_limit) { solver_status_ = mip_status_t::NODE_LIMIT; + stack.push_front(node_ptr); break; } @@ -1485,6 +1487,7 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, if (lp_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; + stack.push_front(node_ptr); break; } @@ -1493,7 +1496,10 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, break; } - if (lp_status == dual::status_t::ITERATION_LIMIT) { break; } + if (lp_status == dual::status_t::ITERATION_LIMIT) { + stack.push_front(node_ptr); + break; + } ++exploration_stats_.nodes_since_last_log; ++exploration_stats_.nodes_explored; @@ -1543,16 +1549,13 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); } - if (stack.size() > 0 && - (rel_gap <= settings_.relative_mip_gap_tol || abs_gap <= settings_.absolute_mip_gap_tol)) { - // If the solver converged according to the gap rules, but we still have nodes to explore - // in the stack, then we should add all the pending nodes back to the heap so the lower - // bound of the solver is set to the correct value. - while (!stack.empty()) { - auto node = stack.front(); - stack.pop_front(); - worker->node_queue.push(node); - } + // If the solver was forced to stop, but we still have nodes to explore + // in the stack, then we should add all the pending nodes back to the heap so the lower + // bound of the solver is set to the correct value. + while (!stack.empty()) { + auto node = stack.front(); + stack.pop_front(); + worker->node_queue.push(node); } } diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index 43e93f714d..09e9068e3f 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -165,12 +165,12 @@ inline double fetch_max(omp_atomic_t& atomic_var, double other) } // CAS: atomically sets `atomic_var` to `desired` if it equals `expected`. -// On failure, loads the current value into expected. +// On failure, loads the current value into `expected`. // Returns true if the exchange happened. inline bool compare_exchange(omp_atomic_t& atomic_var, int& expected, int desired) { int old; -#pragma omp atomic compare capture +#pragma omp atomic compare capture seq_cst { old = atomic_var.val; if (atomic_var.val == expected) { atomic_var.val = desired; } From e108a546d97ac19472127dfa632039a6676f6297 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 10 Apr 2026 16:02:25 +0200 Subject: [PATCH 17/53] added atomic in node queue to track size and lower bound without a lock. Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/node_queue.hpp | 88 +++++++++++++++---------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 856d3af1c5..28acf39f68 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -29,12 +28,14 @@ class heap_t { { buffer.push_back(node); std::push_heap(buffer.begin(), buffer.end(), comp); + ++num_entries_; } void push(T&& node) { buffer.push_back(std::move(node)); std::push_heap(buffer.begin(), buffer.end(), comp); + ++num_entries_; } template @@ -42,6 +43,7 @@ class heap_t { { buffer.emplace_back(std::forward(args)...); std::push_heap(buffer.begin(), buffer.end(), comp); + ++num_entries_; } T pop() @@ -49,82 +51,97 @@ class heap_t { std::pop_heap(buffer.begin(), buffer.end(), comp); T node = std::move(buffer.back()); buffer.pop_back(); + --num_entries_; return node; } - size_t size() const { return buffer.size(); } + size_t size() const { return num_entries_; } T& top() { return buffer.front(); } - void clear() { buffer.clear(); } - bool empty() const { return buffer.empty(); } + + void clear() + { + buffer.clear(); + num_entries_ = 0; + } + + bool empty() const { return num_entries_ == 0; } // Read-only access to underlying buffer for iteration without modification const std::vector& data() const { return buffer; } private: std::vector buffer; + omp_atomic_t num_entries_{0}; Comp comp; }; // A queue storing the nodes waiting to be explored/dived from. +// +// Both heaps share ownership of heap_entry_t via shared_ptr. This keeps the entry alive even +// after the mip_node_t it points to has been freed, so pop_diving() can safely check +// entry->node without a dangling dereference. +// +// Cross-heap invalidation: pop_best_first() nulls entry->node via std::exchange; pop_diving() +// skips entries where entry->node == nullptr. +// +// Lock-free reads: best_first_queue_size(), diving_queue_size(), and get_lower_bound() read +// atomic shadow variables and do not acquire the mutex. template class node_queue_t { public: void push(mip_node_t* new_node) { - std::lock_guard lock(mutex); + std::lock_guard lock(mutex_); auto entry = std::make_shared(new_node); - best_first_heap.push(entry); - diving_heap.push(entry); + best_first_heap_.push(entry); + diving_heap_.push(entry); + lower_bound_ = best_first_heap_.top()->lower_bound; + ++diving_live_size_; } // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call // `unlock()` afterward. mip_node_t* pop_best_first() { - if (best_first_heap.empty()) { return nullptr; } - auto entry = best_first_heap.pop(); - return std::exchange(entry->node, nullptr); + if (best_first_heap_.empty()) { return nullptr; } + auto entry = best_first_heap_.pop(); + lower_bound_ = get_lower_bound(); + mip_node_t* node = std::exchange(entry->node, nullptr); + --diving_live_size_; + return node; } // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call // `unlock()` afterward. mip_node_t* pop_diving() { - while (!diving_heap.empty()) { - auto entry = diving_heap.pop(); - auto node_ptr = entry->node; - if (node_ptr != nullptr) { return node_ptr; } + while (!diving_heap_.empty()) { + auto entry = diving_heap_.pop(); + if (entry->node != nullptr) { + --diving_live_size_; + return entry->node; + } } - return nullptr; } - void lock() { mutex.lock(); } - void unlock() { mutex.unlock(); } + void lock() { mutex_.lock(); } + void unlock() { mutex_.unlock(); } - i_t diving_queue_size() - { - std::lock_guard lock(mutex); - return diving_heap.size(); - } + i_t diving_queue_size() { return diving_live_size_; } - i_t best_first_queue_size() - { - std::lock_guard lock(mutex); - return best_first_heap.size(); - } + i_t best_first_queue_size() { return best_first_heap_.size(); } f_t get_lower_bound() { - std::lock_guard lock(mutex); - return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; + return best_first_heap_.empty() ? std::numeric_limits::infinity() : lower_bound_.load(); } private: struct heap_entry_t { mip_node_t* node = nullptr; - f_t lower_bound = -inf; - f_t score = inf; + f_t lower_bound = -std::numeric_limits::infinity(); + f_t score = std::numeric_limits::infinity(); heap_entry_t(mip_node_t* new_node) : node(new_node), lower_bound(new_node->lower_bound), score(new_node->objective_estimate) @@ -152,9 +169,12 @@ class node_queue_t { } }; - heap_t, lower_bound_comp> best_first_heap; - heap_t, score_comp> diving_heap; - omp_mutex_t mutex; + heap_t, lower_bound_comp> best_first_heap_; + heap_t, score_comp> diving_heap_; + omp_mutex_t mutex_; + + omp_atomic_t lower_bound_{std::numeric_limits::infinity()}; + omp_atomic_t diving_live_size_{0}; }; } // namespace cuopt::linear_programming::dual_simplex From 315aca6f915c1547af3bf66ff76746c547fa7cac Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 10 Apr 2026 16:37:36 +0200 Subject: [PATCH 18/53] replaced `std::deque` with a circular buffer. Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 23 ++-- cpp/src/branch_and_bound/worker_pool.hpp | 4 +- cpp/src/utilities/circular_deque.hpp | 112 ++++++++++++++++++ 3 files changed, 127 insertions(+), 12 deletions(-) create mode 100644 cpp/src/utilities/circular_deque.hpp diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 85f11c48bf..acca652d82 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -33,10 +34,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -1085,12 +1084,12 @@ struct deterministic_diving_policy_t : deterministic_policy_base_t> { using base = deterministic_policy_base_t>; - std::deque*>& stack; + circular_deque_t*>& stack; i_t max_backtrack_depth; deterministic_diving_policy_t(branch_and_bound_t& bnb, deterministic_diving_worker_t& worker, - std::deque*>& stack, + circular_deque_t*>& stack, i_t max_backtrack_depth) : base(bnb, worker), stack(stack), max_backtrack_depth(max_backtrack_depth) { @@ -1439,7 +1438,9 @@ template void branch_and_bound_t::plunge_with(bfs_worker_t* worker, mip_node_t* start_node) { - std::deque*> stack; + // Stack holds at most 2 entries: the preferred child + its sibling. + // The sibling is evicted to the queue before a new pair of children is added. + circular_deque_t*> stack(4); stack.push_front(start_node); f_t lower_bound = get_lower_bound(); @@ -1618,7 +1619,7 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) worker->recompute_bounds = true; search_tree_t dive_tree(std::move(worker->start_node)); - std::deque*> stack; + circular_deque_t*> stack(2 * diving_backtrack_limit + 4); stack.push_front(&dive_tree.root); branch_and_bound_stats_t dive_stats; @@ -3813,11 +3814,6 @@ void branch_and_bound_t::deterministic_dive( { raft::common::nvtx::range scope("BB::deterministic_dive"); - // Create local search tree for the dive - search_tree_t dive_tree(std::move(entry.node)); - std::deque*> stack; - stack.push_front(&dive_tree.root); - worker.dive_lower = std::move(entry.resolved_lower); worker.dive_upper = std::move(entry.resolved_upper); @@ -3827,6 +3823,11 @@ void branch_and_bound_t::deterministic_dive( worker.lp_iters_this_dive = 0; worker.recompute_bounds_and_basis = true; + // Create local search tree for the dive + search_tree_t dive_tree(std::move(entry.node)); + circular_deque_t*> stack(2 * max_backtrack_depth + 4); + stack.push_front(&dive_tree.root); + while (!stack.empty() && deterministic_global_termination_status_ == mip_status_t::UNSET && nodes_this_dive < max_nodes_per_dive) { mip_node_t* node_ptr = stack.front(); diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index eabdc0beaa..e4de1af7b9 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -8,6 +8,7 @@ #pragma once #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -26,6 +27,7 @@ class worker_pool_t { { workers_.resize(num_workers); num_idle_workers_ = num_workers; + idle_workers_.clear_resize(num_workers); for (i_t i = 0; i < num_workers; ++i) { workers_[i] = std::make_unique(i, original_lp, Arow, var_type, settings, rng_offset); @@ -78,7 +80,7 @@ class worker_pool_t { bool is_initialized = false; omp_mutex_t mutex_; - std::deque idle_workers_; + circular_deque_t idle_workers_; omp_atomic_t num_idle_workers_; }; diff --git a/cpp/src/utilities/circular_deque.hpp b/cpp/src/utilities/circular_deque.hpp new file mode 100644 index 0000000000..3fa7756a75 --- /dev/null +++ b/cpp/src/utilities/circular_deque.hpp @@ -0,0 +1,112 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +namespace cuopt { + +// A fixed-capacity double-ended queue backed by a circular buffer. +// All operations are O(1) with no dynamic allocation after construction. +// +// Preconditions (asserted in debug builds): +// - push_front / push_back : size() < capacity() +// - pop_front / pop_back : !empty() +// - front / back : !empty() +template +class circular_deque_t { + public: + circular_deque_t() : buffer_(1), capacity_(1), head_(0), tail_(0) {} + + // Allocates storage for exactly `capacity` elements up front. + explicit circular_deque_t(size_t capacity) + : buffer_(capacity + 1), // +1 to distinguish full (next(tail)==head) from empty (head==tail) + capacity_(capacity + 1), + head_(0), + tail_(0) + { + } + + bool empty() const { return head_ == tail_; } + bool full() const { return next(tail_) == head_; } + + size_t size() const { return (tail_ - head_ + capacity_) % capacity_; } + size_t capacity() const { return capacity_ - 1; } + + void clear_resize(size_t new_capacity) + { + head_ = 0; + tail_ = 0; + capacity_ = new_capacity + 1; + buffer_.resize(capacity_); + } + + void push_back(T val) + { + assert(!full()); + buffer_[tail_] = std::move(val); + tail_ = next(tail_); + } + + void push_front(T val) + { + assert(!full()); + head_ = prev(head_); + buffer_[head_] = std::move(val); + } + + T pop_front() + { + assert(!empty()); + T val = std::move(buffer_[head_]); + head_ = next(head_); + return val; + } + + T pop_back() + { + assert(!empty()); + tail_ = prev(tail_); + return std::move(buffer_[tail_]); + } + + T& front() + { + assert(!empty()); + return buffer_[head_]; + } + const T& front() const + { + assert(!empty()); + return buffer_[head_]; + } + + T& back() + { + assert(!empty()); + return buffer_[prev(tail_)]; + } + const T& back() const + { + assert(!empty()); + return buffer_[prev(tail_)]; + } + + private: + size_t next(size_t idx) const { return (idx + 1) % capacity_; } + size_t prev(size_t idx) const { return (idx + capacity_ - 1) % capacity_; } + + std::vector buffer_; + size_t capacity_; + size_t head_; + size_t tail_; +}; + +} // namespace cuopt From 31a6eabcdb1a11fd8fef36af50146af5606d2b5e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 15 Apr 2026 11:42:09 -0500 Subject: [PATCH 19/53] Inline upstream memory resource variable in test fixture MR composition --- cpp/tests/utilities/base_fixture.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index c9c15ae04d..c4cd0e2575 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -36,8 +36,7 @@ inline auto make_pool() { // 1GB of initial pool size const size_t initial_pool_size = 1024 * 1024 * 1024; - auto upstream = make_async(); - return rmm::mr::pool_memory_resource(upstream, initial_pool_size); + return rmm::mr::pool_memory_resource(make_async(), initial_pool_size); } inline auto make_binning() From f889d28b10c3061ad8d52d5236fe1d2bc53ccd00 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 15 Apr 2026 15:24:48 -0500 Subject: [PATCH 20/53] Replace deprecated rmm::mr set_*_resource_ref calls with set_*_resource --- cpp/cuopt_cli.cpp | 2 +- cpp/tests/mip/load_balancing_test.cu | 2 +- cpp/tests/mip/multi_probe_test.cu | 2 +- cpp/tests/utilities/base_fixture.hpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index 0847a2fd11..5b3c765611 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -424,7 +424,7 @@ int main(int argc, char* argv[]) for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) { RAFT_CUDA_TRY(cudaSetDevice(i)); memory_resources.emplace_back(); - rmm::mr::set_per_device_resource_ref(rmm::cuda_device_id{i}, memory_resources.back()); + rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back()); } RAFT_CUDA_TRY(cudaSetDevice(0)); } diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 909db285d8..f9ccbb4c93 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -119,7 +119,7 @@ bounds_probe_results(detail::bound_presolve_t& bnd_prb_0, void test_multi_probe(std::string path) { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource_ref(memory_resource); + rmm::mr::set_current_device_resource(memory_resource); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index f32872b597..d72899b171 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -141,7 +141,7 @@ multi_probe_results( void test_multi_probe(std::string path) { auto memory_resource = make_async(); - rmm::mr::set_current_device_resource_ref(memory_resource); + rmm::mr::set_current_device_resource(memory_resource); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index c4cd0e2575..31d7923dfa 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -119,6 +119,6 @@ inline auto parse_test_options(int argc, char** argv) auto const cmd_opts = parse_test_options(argc, argv); \ auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ auto resource = cuopt::test::create_memory_resource(rmm_mode); \ - rmm::mr::set_current_device_resource_ref(resource); \ + rmm::mr::set_current_device_resource(resource); \ return RUN_ALL_TESTS(); \ } From 3469026f2fae620931b4116cdc292bb0f7d6e4e9 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Thu, 16 Apr 2026 11:31:11 +0200 Subject: [PATCH 21/53] renamed method Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/pseudo_costs.hpp | 8 ++++---- cpp/src/utilities/omp_helpers.hpp | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 5db0d573a2..9635a6ca7f 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -255,10 +255,10 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { Base::pseudo_cost_sum_up.resize(n); for (i_t i = 0; i < n; ++i) { - Base::pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].get_no_atomic(); - Base::pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].get_no_atomic(); - Base::pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].get_no_atomic(); - Base::pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].get_no_atomic(); + Base::pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].underlying(); + Base::pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].underlying(); + Base::pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].underlying(); + Base::pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].underlying(); } return *this; diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index 8890a7487a..bbf4327f81 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -127,9 +127,8 @@ class omp_atomic_t { T fetch_sub(T inc) { return fetch_add(-inc); } // Get the underlying value without atomics - T& get_no_atomic() { return val; } - - T get_no_atomic() const { return val; } + T& underlying() { return val; } + T underlying() const { return val; } private: T val; From f3e863f1ab030899ca8b39539ab50dadcfa80a75 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Thu, 16 Apr 2026 16:08:17 +0200 Subject: [PATCH 22/53] fixed compilation Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 3c4568e327..35c2172d48 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1591,7 +1591,7 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t worker->integer_infeasible = start_node->integer_infeasible; worker->node_queue.unlock(); - if (get_cutoff() < start_node->lower_bound) { + if (upper_bound_.load() < start_node->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound search_tree_.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); @@ -1726,7 +1726,8 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* bfs_worker->node_queue.lock(); mip_node_t* start_node = bfs_worker->node_queue.pop_diving(); - if (!start_node || get_cutoff() < start_node->lower_bound || start_node->depth < min_node_depth) { + if (!start_node || upper_bound_.load() < start_node->lower_bound || + start_node->depth < min_node_depth) { diving_worker_pool_.return_worker_to_pool(diving_worker); bfs_worker->node_queue.unlock(); return false; From 56bf9ed9673ef4ffe4e43933964546a2957e8481 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 17 Apr 2026 10:11:25 +0200 Subject: [PATCH 23/53] fixed small bugs Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 36 +++++++++++++++---- cpp/src/branch_and_bound/node_queue.hpp | 13 ++----- cpp/src/branch_and_bound/pseudo_costs.hpp | 8 ++--- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 35c2172d48..445a15094e 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -258,7 +258,7 @@ branch_and_bound_t::branch_and_bound_t( pc_(1), solver_status_(mip_status_t::UNSET), rng_(settings_.random_seed ^ pcgenerator_t::default_seed, - settings_.random_seed ^ pcgenerator_t::default_stream ^ settings_.random_seed) + settings_.random_seed ^ pcgenerator_t::default_stream) { exploration_stats_.start_time = start_time; #ifdef PRINT_CONSTRAINT_MATRIX @@ -1124,7 +1124,12 @@ struct deterministic_diving_policy_t this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); case search_strategy_t::LINE_SEARCH_DIVING: - return line_search_diving(fractional, x, *this->worker.root_solution, log); + if (this->worker.root_solution) { + return line_search_diving(fractional, x, *this->worker.root_solution, log); + } else { + return pseudocost_diving( + this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); + } case search_strategy_t::GUIDED_DIVING: if (this->worker.incumbent_snapshot.empty()) { @@ -1817,16 +1822,25 @@ void branch_and_bound_t::run_scheduler() if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && time_since_last_log >= 1) || (time_since_last_log > 30) || now > settings_.time_limit) { - i_t node_depth = std::numeric_limits::max(); - i_t int_infeas = 0; + i_t node_depth = std::numeric_limits::max(); + i_t int_infeas = 0; + i_t bfs_node_queue_size = 0; + i_t diving_node_queue_size = 0; + for (int k = 0; k < num_bfs_workers; ++k) { bfs_worker_t* worker = bfs_worker_pool_.get_worker(k); if (worker->is_active) { node_depth = std::min(node_depth, worker->node_depth.load()); int_infeas = std::max(int_infeas, worker->integer_infeasible.load()); } + + bfs_node_queue_size += worker->node_queue.best_first_queue_size(); + diving_node_queue_size += worker->node_queue.diving_queue_size(); } + std::cout << std::format("bfs={}, diving={}", bfs_node_queue_size, diving_node_queue_size) + << std::endl; + report(' ', upper_bound_, lower_bound, node_depth, int_infeas); exploration_stats_.last_log = tic(); exploration_stats_.nodes_since_last_log = 0; @@ -1840,10 +1854,16 @@ void branch_and_bound_t::run_scheduler() for (i_t k = 0; k < num_bfs_workers && bfs_worker_pool_.num_idle_workers() > 0; ++k) { bfs_worker_t* worker = bfs_worker_pool_.get_worker(k); if (worker->is_active && worker->node_queue.best_first_queue_size() > 1) { - std::lock_guard lock(worker->node_queue); - mip_node_t* node = worker->node_queue.pop_best_first(); + mip_node_t* node = nullptr; + { + std::lock_guard lock(worker->node_queue); + node = worker->node_queue.pop_best_first(); + } if (!node) { continue; } - if (!launch_bfs_worker(node)) { break; } + if (!launch_bfs_worker(node)) { + worker->node_queue.push(node); + break; + } launched_any_task = true; } } @@ -1889,6 +1909,8 @@ void branch_and_bound_t::single_threaded_solve() bfs_worker_pool_.init(1, original_lp_, Arow_, var_types_, settings_); bfs_worker_t* worker = bfs_worker_pool_.get_worker(0); node_queue_t& node_queue = worker->node_queue; + node_queue.push(search_tree_.root.get_down_child()); + node_queue.push(search_tree_.root.get_up_child()); f_t lower_bound = get_lower_bound(); f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 28acf39f68..e169e868e9 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -76,16 +76,6 @@ class heap_t { }; // A queue storing the nodes waiting to be explored/dived from. -// -// Both heaps share ownership of heap_entry_t via shared_ptr. This keeps the entry alive even -// after the mip_node_t it points to has been freed, so pop_diving() can safely check -// entry->node without a dangling dereference. -// -// Cross-heap invalidation: pop_best_first() nulls entry->node via std::exchange; pop_diving() -// skips entries where entry->node == nullptr. -// -// Lock-free reads: best_first_queue_size(), diving_queue_size(), and get_lower_bound() read -// atomic shadow variables and do not acquire the mutex. template class node_queue_t { public: @@ -105,7 +95,8 @@ class node_queue_t { { if (best_first_heap_.empty()) { return nullptr; } auto entry = best_first_heap_.pop(); - lower_bound_ = get_lower_bound(); + lower_bound_ = best_first_heap_.empty() ? std::numeric_limits::infinity() + : best_first_heap_.top()->lower_bound; mip_node_t* node = std::exchange(entry->node, nullptr); --diving_live_size_; return node; diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 5db0d573a2..30f6e5d7e9 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -86,10 +86,10 @@ struct batch_pdlp_warm_cache_t { template struct pseudo_cost_averages_t { - f_t down_avg; - i_t num_init_down; - f_t up_avg; - i_t num_init_up; + f_t down_avg = 0; + i_t num_init_down = 0; + f_t up_avg = 0; + i_t num_init_up = 0; }; template From 18e1e831d7f0cabdc57eeacfd2077dc2d9665727 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 17 Apr 2026 13:25:23 +0200 Subject: [PATCH 24/53] added cleanup routine for the diving heap Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 12 ++------ cpp/src/branch_and_bound/node_queue.hpp | 30 ++++++++++++++++++- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 445a15094e..3eef03c877 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1822,10 +1822,8 @@ void branch_and_bound_t::run_scheduler() if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && time_since_last_log >= 1) || (time_since_last_log > 30) || now > settings_.time_limit) { - i_t node_depth = std::numeric_limits::max(); - i_t int_infeas = 0; - i_t bfs_node_queue_size = 0; - i_t diving_node_queue_size = 0; + i_t node_depth = std::numeric_limits::max(); + i_t int_infeas = 0; for (int k = 0; k < num_bfs_workers; ++k) { bfs_worker_t* worker = bfs_worker_pool_.get_worker(k); @@ -1833,14 +1831,8 @@ void branch_and_bound_t::run_scheduler() node_depth = std::min(node_depth, worker->node_depth.load()); int_infeas = std::max(int_infeas, worker->integer_infeasible.load()); } - - bfs_node_queue_size += worker->node_queue.best_first_queue_size(); - diving_node_queue_size += worker->node_queue.diving_queue_size(); } - std::cout << std::format("bfs={}, diving={}", bfs_node_queue_size, diving_node_queue_size) - << std::endl; - report(' ', upper_bound_, lower_bound, node_depth, int_infeas); exploration_stats_.last_log = tic(); exploration_stats_.nodes_since_last_log = 0; diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index e169e868e9..0c122613b9 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -69,6 +69,18 @@ class heap_t { // Read-only access to underlying buffer for iteration without modification const std::vector& data() const { return buffer; } + // Remove entries matching `should_remove` and rebuild the heap. + // Caller must hold any external lock before calling this. + template + void compact(Pred&& should_remove) + { + auto it = std::remove_if(buffer.begin(), buffer.end(), std::forward(should_remove)); + size_t removed = std::distance(it, buffer.end()); + buffer.erase(it, buffer.end()); + num_entries_ = buffer.size(); + if (removed > 0) { std::make_heap(buffer.begin(), buffer.end(), comp); } + } + private: std::vector buffer; omp_atomic_t num_entries_{0}; @@ -106,6 +118,8 @@ class node_queue_t { // `unlock()` afterward. mip_node_t* pop_diving() { + compact_diving_heap(); + while (!diving_heap_.empty()) { auto entry = diving_heap_.pop(); if (entry->node != nullptr) { @@ -120,7 +134,6 @@ class node_queue_t { void unlock() { mutex_.unlock(); } i_t diving_queue_size() { return diving_live_size_; } - i_t best_first_queue_size() { return best_first_heap_.size(); } f_t get_lower_bound() @@ -129,6 +142,21 @@ class node_queue_t { } private: + void compact_diving_heap() + { + // Allow a maximum of 1024 "dead" entries + constexpr i_t max_dead_entries = 1024; + + i_t heap_size = static_cast(diving_heap_.size()); + i_t live = diving_live_size_.load(); + if (heap_size <= live) { return; } + i_t dead = heap_size - live; + if (dead >= max_dead_entries) { + diving_heap_.compact( + [](const std::shared_ptr& e) { return e->node == nullptr; }); + } + } + struct heap_entry_t { mip_node_t* node = nullptr; f_t lower_bound = -std::numeric_limits::infinity(); From 17db46c3192e878f79d23a20c40c51a4a928fc1e Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 17 Apr 2026 17:14:05 +0200 Subject: [PATCH 25/53] bfs workers now can launch diving workers Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 108 +++++++++--------- cpp/src/branch_and_bound/branch_and_bound.hpp | 5 +- cpp/src/branch_and_bound/node_queue.hpp | 27 +---- 3 files changed, 55 insertions(+), 85 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 3eef03c877..5dde5e4322 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -211,6 +211,8 @@ std::string user_mip_gap(const lp_problem_t& lp, f_t obj_value, f_t lo } } +#define SHOW_DIVING_TYPE + #ifdef SHOW_DIVING_TYPE inline char feasible_solution_symbol(search_strategy_t strategy) { @@ -1578,9 +1580,40 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + i_t num_workers = bfs_worker_pool_.num_workers() + diving_worker_pool_.num_workers(); + diving_heuristics_settings_t diving_settings = settings_.diving_settings; + if (!has_solver_space_incumbent()) { diving_settings.guided_diving = false; } + std::vector search_strategies = get_search_strategies(diving_settings); + std::array max_num_workers = + get_max_workers(num_workers, search_strategies); + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && worker->node_queue.best_first_queue_size() > 0) { + // If the guided diving was disabled previously due to the lack of an incumbent solution, + // re-enable as soon as a new incumbent is found. + if (settings_.diving_settings.guided_diving != diving_settings.guided_diving) { + if (has_solver_space_incumbent()) { + diving_settings.guided_diving = settings_.diving_settings.guided_diving; + search_strategies = get_search_strategies(diving_settings); + max_num_workers = get_max_workers(num_workers, search_strategies); + } + } + + if (diving_worker_pool_.num_idle_workers() > 0 && worker->node_queue.diving_queue_size() > 0) { + for (int i = 1; i < search_strategies.size(); ++i) { + auto strategy = search_strategies[i]; + + if (worker->node_queue.diving_queue_size() == 0 || + diving_worker_pool_.num_idle_workers() == 0) { + break; + } + + if (num_active_workers_[strategy] >= max_num_workers[strategy]) { continue; } + launch_diving_worker(worker, strategy); + } + } + worker->recompute_basis = true; worker->recompute_bounds = true; @@ -1612,7 +1645,7 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t } bfs_worker_pool_.return_worker_to_pool(worker); - active_workers_per_strategy_[BEST_FIRST]--; + num_active_workers_[BEST_FIRST]--; } template @@ -1701,7 +1734,7 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) } diving_worker_pool_.return_worker_to_pool(worker); - active_workers_per_strategy_[search_strategy]--; + num_active_workers_[search_strategy]--; } template @@ -1711,7 +1744,7 @@ bool branch_and_bound_t::launch_bfs_worker(mip_node_t* start if (!idle_worker) { return false; } idle_worker->init(start_node); - active_workers_per_strategy_[BEST_FIRST]++; + num_active_workers_[BEST_FIRST]++; #pragma omp task affinity(idle_worker) best_first_search_with(idle_worker); @@ -1721,8 +1754,7 @@ bool branch_and_bound_t::launch_bfs_worker(mip_node_t* start template bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* bfs_worker, - search_strategy_t diving_type, - i_t min_node_depth) + search_strategy_t diving_type) { // Get an idle worker. diving_worker_t* diving_worker = diving_worker_pool_.pop_idle_worker(); @@ -1732,7 +1764,7 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* mip_node_t* start_node = bfs_worker->node_queue.pop_diving(); if (!start_node || upper_bound_.load() < start_node->lower_bound || - start_node->depth < min_node_depth) { + start_node->depth < settings_.diving_settings.min_node_depth) { diving_worker_pool_.return_worker_to_pool(diving_worker); bfs_worker->node_queue.unlock(); return false; @@ -1747,7 +1779,7 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* return false; } - active_workers_per_strategy_[diving_type]++; + num_active_workers_[diving_type]++; #pragma omp task affinity(diving_worker) dive_with(diving_worker); @@ -1762,25 +1794,16 @@ void branch_and_bound_t::run_scheduler() const i_t num_workers = 2 * settings_.num_threads; if (!has_solver_space_incumbent()) { diving_settings.guided_diving = false; } - std::vector strategies = get_search_strategies(diving_settings); + std::vector search_strategies = get_search_strategies(diving_settings); std::array max_num_workers_per_type = - get_max_workers(num_workers, strategies); + get_max_workers(num_workers, search_strategies); const i_t num_bfs_workers = max_num_workers_per_type[BEST_FIRST]; const i_t num_diving_workers = num_workers - num_bfs_workers; bfs_worker_pool_.init(num_bfs_workers, original_lp_, Arow_, var_types_, settings_); diving_worker_pool_.init( num_diving_workers, original_lp_, Arow_, var_types_, settings_, num_bfs_workers); - active_workers_per_strategy_.fill(0); - -#ifdef CUOPT_LOG_DEBUG - for (auto strategy : strategies) { - settings_.log.debug("%c%d: max num of workers = %d", - feasible_solution_symbol(strategy), - strategy, - max_num_workers_per_type[strategy]); - } -#endif + num_active_workers_.fill(0); f_t lower_bound = get_lower_bound(); f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); @@ -1790,30 +1813,11 @@ void branch_and_bound_t::run_scheduler() launch_bfs_worker(search_tree_.root.get_down_child()); while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && active_workers_per_strategy_[0] > 0) { + rel_gap > settings_.relative_mip_gap_tol && num_active_workers_[BEST_FIRST] > 0) { bool launched_any_task = false; repair_heuristic_solutions(); - // If the guided diving was disabled previously due to the lack of an incumbent solution, - // re-enable as soon as a new incumbent is found. - if (settings_.diving_settings.guided_diving != diving_settings.guided_diving) { - if (has_solver_space_incumbent()) { - diving_settings.guided_diving = settings_.diving_settings.guided_diving; - strategies = get_search_strategies(diving_settings); - max_num_workers_per_type = get_max_workers(num_workers, strategies); - -#ifdef CUOPT_LOG_DEBUG - for (auto type : strategies) { - settings_.log.debug("%c%d: max num of workers = %d", - feasible_solution_symbol(type), - type, - max_num_workers_per_type[type]); - } -#endif - } - } - f_t now = toc(exploration_stats_.start_time); f_t time_since_last_log = exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); @@ -1833,6 +1837,14 @@ void branch_and_bound_t::run_scheduler() } } + for (int i = 0; i < max_num_workers_per_type.size(); ++i) { + settings_.log.printf("%c%d: max num of workers = %d/%d", + feasible_solution_symbol(static_cast(i)), + i, + num_active_workers_[i], + max_num_workers_per_type[i]); + } + report(' ', upper_bound_, lower_bound, node_depth, int_infeas); exploration_stats_.last_log = tic(); exploration_stats_.nodes_since_last_log = 0; @@ -1860,24 +1872,6 @@ void branch_and_bound_t::run_scheduler() } } - for (int i = 1; i < strategies.size(); ++i) { - auto diving_type = strategies[i]; - i_t num_new_tasks = - max_num_workers_per_type[diving_type] - active_workers_per_strategy_[diving_type]; - - while (num_new_tasks > 0 && diving_worker_pool_.num_idle_workers() > 0) { - --num_new_tasks; - i_t k = rng_.uniform(0, num_bfs_workers); - bfs_worker_t* bfs_worker = bfs_worker_pool_.get_worker(k); - if (!bfs_worker->is_active) { continue; } - if (bfs_worker->node_queue.diving_queue_size() == 0) { continue; } - - if (launch_diving_worker(bfs_worker, diving_type, diving_settings.min_node_depth)) { - launched_any_task = true; - } - } - } - lower_bound = get_lower_bound(); abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 3d6773eeb9..612bb5026c 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -240,7 +240,7 @@ class branch_and_bound_t { // Count the number of workers per type that either are being executed or // are waiting to be executed. - std::array, num_search_strategies> active_workers_per_strategy_; + std::array, num_search_strategies> num_active_workers_; // Worker pool dedicated to the best-first search bfs_worker_pool_t bfs_worker_pool_; @@ -292,8 +292,7 @@ class branch_and_bound_t { bool launch_bfs_worker(mip_node_t* start_node); bool launch_diving_worker(bfs_worker_t* bfs_worker, - std::vector::value_type diving_type, - i_t min_node_depth); + std::vector::value_type diving_type); void best_first_search_with(bfs_worker_t* worker); diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 0c122613b9..2ae04847d2 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -98,7 +98,6 @@ class node_queue_t { best_first_heap_.push(entry); diving_heap_.push(entry); lower_bound_ = best_first_heap_.top()->lower_bound; - ++diving_live_size_; } // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call @@ -110,7 +109,6 @@ class node_queue_t { lower_bound_ = best_first_heap_.empty() ? std::numeric_limits::infinity() : best_first_heap_.top()->lower_bound; mip_node_t* node = std::exchange(entry->node, nullptr); - --diving_live_size_; return node; } @@ -118,14 +116,9 @@ class node_queue_t { // `unlock()` afterward. mip_node_t* pop_diving() { - compact_diving_heap(); - while (!diving_heap_.empty()) { auto entry = diving_heap_.pop(); - if (entry->node != nullptr) { - --diving_live_size_; - return entry->node; - } + if (entry->node != nullptr) { return entry->node; } } return nullptr; } @@ -133,7 +126,7 @@ class node_queue_t { void lock() { mutex_.lock(); } void unlock() { mutex_.unlock(); } - i_t diving_queue_size() { return diving_live_size_; } + i_t diving_queue_size() { return diving_heap_.size(); } i_t best_first_queue_size() { return best_first_heap_.size(); } f_t get_lower_bound() @@ -142,21 +135,6 @@ class node_queue_t { } private: - void compact_diving_heap() - { - // Allow a maximum of 1024 "dead" entries - constexpr i_t max_dead_entries = 1024; - - i_t heap_size = static_cast(diving_heap_.size()); - i_t live = diving_live_size_.load(); - if (heap_size <= live) { return; } - i_t dead = heap_size - live; - if (dead >= max_dead_entries) { - diving_heap_.compact( - [](const std::shared_ptr& e) { return e->node == nullptr; }); - } - } - struct heap_entry_t { mip_node_t* node = nullptr; f_t lower_bound = -std::numeric_limits::infinity(); @@ -193,7 +171,6 @@ class node_queue_t { omp_mutex_t mutex_; omp_atomic_t lower_bound_{std::numeric_limits::infinity()}; - omp_atomic_t diving_live_size_{0}; }; } // namespace cuopt::linear_programming::dual_simplex From 24d30362c1393da1275759e62df0d10ed11dec03 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Sun, 19 Apr 2026 11:51:51 -0500 Subject: [PATCH 26/53] Use RAFT_CUDA_TRY for cudaMemGetInfo. --- cpp/src/utilities/cuda_helpers.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index 2ed4c5a39b..eccf8e1538 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -241,7 +241,8 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size) inline size_t get_device_memory_size() { size_t free_mem, total_mem; - cudaMemGetInfo(&free_mem, &total_mem); + RAFT_CUDA_TRY(cudaMemGetInfo(&free_mem, &total_mem)); + // TODO (bdice): Restore limiting adaptor check after updating CCCL to support resource_cast return total_mem; } From bba777774ffe4d15e6400017f6d40031aa73e600 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Mon, 20 Apr 2026 16:08:25 +0200 Subject: [PATCH 27/53] removed scheduler thread. bfs workers are responsible for launching the diving workers. Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 346 ++++++------------ cpp/src/branch_and_bound/branch_and_bound.hpp | 16 +- cpp/src/branch_and_bound/constants.hpp | 4 + cpp/src/branch_and_bound/worker.hpp | 98 ++++- cpp/src/branch_and_bound/worker_pool.hpp | 42 +-- 5 files changed, 208 insertions(+), 298 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 5dde5e4322..adf2420f97 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -211,8 +211,6 @@ std::string user_mip_gap(const lp_problem_t& lp, f_t obj_value, f_t lo } } -#define SHOW_DIVING_TYPE - #ifdef SHOW_DIVING_TYPE inline char feasible_solution_symbol(search_strategy_t strategy) { @@ -1126,12 +1124,7 @@ struct deterministic_diving_policy_t this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); case search_strategy_t::LINE_SEARCH_DIVING: - if (this->worker.root_solution) { - return line_search_diving(fractional, x, *this->worker.root_solution, log); - } else { - return pseudocost_diving( - this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); - } + return line_search_diving(fractional, x, *this->worker.root_solution, log); case search_strategy_t::GUIDED_DIVING: if (this->worker.incumbent_snapshot.empty()) { @@ -1456,13 +1449,17 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, circular_deque_t*> stack(4); stack.push_front(start_node); - f_t lower_bound = get_lower_bound(); - f_t upper_bound = upper_bound_; - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); + f_t lower_bound = get_lower_bound(); + f_t upper_bound = upper_bound_; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); + worker->recompute_basis = true; + worker->recompute_bounds = true; while (stack.size() > 0 && (solver_status_ == mip_status_t::UNSET && is_running_) && rel_gap > settings_.relative_mip_gap_tol && abs_gap > settings_.absolute_mip_gap_tol) { + if (worker->is_main_worker) { repair_heuristic_solutions(); } + mip_node_t* node_ptr = stack.front(); stack.pop_front(); @@ -1472,9 +1469,7 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, // - The current node and its siblings uses the lower bound of the parent before solving the LP // relaxation // - The lower bound of the parent is lower or equal to its children - worker->lower_bound = node_ptr->lower_bound; - worker->node_depth = node_ptr->depth; - worker->integer_infeasible = node_ptr->integer_infeasible; + worker->lower_bound = node_ptr->lower_bound; if (node_ptr->lower_bound > upper_bound_.load()) { search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); @@ -1485,7 +1480,23 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, continue; } - if (toc(exploration_stats_.start_time) > settings_.time_limit) { + f_t now = toc(exploration_stats_.start_time); + + if (worker->is_main_worker) { + f_t time_since_last_log = + exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); + i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; + + if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + report(' ', upper_bound_, lower_bound, node_ptr->depth, node_ptr->integer_infeasible); + exploration_stats_.last_log = tic(); + exploration_stats_.nodes_since_last_log = 0; + } + } + + if (now > settings_.time_limit) { solver_status_ = mip_status_t::TIME_LIMIT; stack.push_front(node_ptr); break; @@ -1561,6 +1572,28 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, upper_bound = upper_bound_; rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); + + if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { + node_concurrent_halt_ = 1; + solver_status_ = mip_status_t::OPTIMAL; + break; + } + + // Launch a new diving task if any worker is idle + if (worker->total_active_diving_workers < worker->total_max_diving_workers && + worker->node_queue.diving_queue_size() > 0) { + launch_diving_worker(worker); + } + + // If any best-first worker become idle, + if (bfs_worker_pool_.num_idle_workers() > 0 && worker->node_queue.best_first_queue_size() > 0) { + worker->node_queue.lock(); + mip_node_t* node = worker->node_queue.pop_best_first(); + if (node != nullptr) { + if (!launch_bfs_worker(node)) { worker->node_queue.push(node); } + } + worker->node_queue.unlock(); + } } // If the solver was forced to stop, but we still have nodes to explore @@ -1573,6 +1606,21 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, } } +template +bfs_worker_t* branch_and_bound_t::launch_bfs_worker( + mip_node_t* start_node) +{ + bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); + if (!idle_worker) { return nullptr; } + + idle_worker->init(start_node); + +#pragma omp task affinity(idle_worker) + best_first_search_with(idle_worker); + + return idle_worker; +} + template void branch_and_bound_t::best_first_search_with(bfs_worker_t* worker) { @@ -1580,43 +1628,26 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - i_t num_workers = bfs_worker_pool_.num_workers() + diving_worker_pool_.num_workers(); - diving_heuristics_settings_t diving_settings = settings_.diving_settings; - if (!has_solver_space_incumbent()) { diving_settings.guided_diving = false; } - std::vector search_strategies = get_search_strategies(diving_settings); - std::array max_num_workers = - get_max_workers(num_workers, search_strategies); + worker->calculate_num_diving_workers(bfs_worker_pool_.num_workers(), + diving_worker_pool_.num_workers(), + has_solver_space_incumbent(), + settings_.diving_settings); while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && worker->node_queue.best_first_queue_size() > 0) { // If the guided diving was disabled previously due to the lack of an incumbent solution, // re-enable as soon as a new incumbent is found. - if (settings_.diving_settings.guided_diving != diving_settings.guided_diving) { + if (diving_worker_pool_.num_workers() > 0 && settings_.diving_settings.guided_diving != 0 && + worker->max_diving_workers[GUIDED_DIVING] == 0) { if (has_solver_space_incumbent()) { - diving_settings.guided_diving = settings_.diving_settings.guided_diving; - search_strategies = get_search_strategies(diving_settings); - max_num_workers = get_max_workers(num_workers, search_strategies); - } - } - - if (diving_worker_pool_.num_idle_workers() > 0 && worker->node_queue.diving_queue_size() > 0) { - for (int i = 1; i < search_strategies.size(); ++i) { - auto strategy = search_strategies[i]; - - if (worker->node_queue.diving_queue_size() == 0 || - diving_worker_pool_.num_idle_workers() == 0) { - break; - } - - if (num_active_workers_[strategy] >= max_num_workers[strategy]) { continue; } - launch_diving_worker(worker, strategy); + worker->calculate_num_diving_workers(bfs_worker_pool_.num_workers(), + diving_worker_pool_.num_workers(), + has_solver_space_incumbent(), + settings_.diving_settings); } } - worker->recompute_basis = true; - worker->recompute_bounds = true; - worker->node_queue.lock(); mip_node_t* start_node = worker->node_queue.pop_best_first(); if (!start_node) { @@ -1624,9 +1655,7 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t continue; } - worker->lower_bound = start_node->lower_bound; - worker->node_depth = start_node->depth; - worker->integer_infeasible = start_node->integer_infeasible; + worker->lower_bound = start_node->lower_bound; worker->node_queue.unlock(); if (upper_bound_.load() < start_node->lower_bound) { @@ -1644,8 +1673,8 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); } + worker->set_inactive(); bfs_worker_pool_.return_worker_to_pool(worker); - num_active_workers_[BEST_FIRST]--; } template @@ -1682,9 +1711,7 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) mip_node_t* node_ptr = stack.front(); stack.pop_front(); - worker->lower_bound = node_ptr->lower_bound; - worker->node_depth = node_ptr->depth; - worker->integer_infeasible = node_ptr->integer_infeasible; + worker->lower_bound = node_ptr->lower_bound; if (node_ptr->lower_bound > upper_bound_.load()) { worker->recompute_basis = true; @@ -1733,28 +1760,12 @@ void branch_and_bound_t::dive_with(diving_worker_t* worker) abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); } + worker->set_inactive(); diving_worker_pool_.return_worker_to_pool(worker); - num_active_workers_[search_strategy]--; -} - -template -bool branch_and_bound_t::launch_bfs_worker(mip_node_t* start_node) -{ - bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); - if (!idle_worker) { return false; } - - idle_worker->init(start_node); - num_active_workers_[BEST_FIRST]++; - -#pragma omp task affinity(idle_worker) - best_first_search_with(idle_worker); - - return true; } template -bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* bfs_worker, - search_strategy_t diving_type) +bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* bfs_worker) { // Get an idle worker. diving_worker_t* diving_worker = diving_worker_pool_.pop_idle_worker(); @@ -1770,7 +1781,7 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* return false; } - diving_worker->init(start_node, original_lp_, diving_type); + diving_worker->init(start_node, original_lp_); bfs_worker->node_queue.unlock(); bool is_feasible = diving_worker->presolve_start_bounds(settings_); @@ -1779,176 +1790,35 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* return false; } - num_active_workers_[diving_type]++; - -#pragma omp task affinity(diving_worker) - dive_with(diving_worker); - - return true; -} - -template -void branch_and_bound_t::run_scheduler() -{ - diving_heuristics_settings_t diving_settings = settings_.diving_settings; - const i_t num_workers = 2 * settings_.num_threads; + for (int i = 1; i < num_search_strategies; ++i) { + auto strategy = search_strategies[i]; - if (!has_solver_space_incumbent()) { diving_settings.guided_diving = false; } - std::vector search_strategies = get_search_strategies(diving_settings); - std::array max_num_workers_per_type = - get_max_workers(num_workers, search_strategies); + if (bfs_worker->active_diving_workers[strategy] < bfs_worker->max_diving_workers[strategy]) { + diving_worker->search_strategy = strategy; + diving_worker->bfs_worker = bfs_worker; + bfs_worker->active_diving_workers[strategy]++; + bfs_worker->total_active_diving_workers++; - const i_t num_bfs_workers = max_num_workers_per_type[BEST_FIRST]; - const i_t num_diving_workers = num_workers - num_bfs_workers; - bfs_worker_pool_.init(num_bfs_workers, original_lp_, Arow_, var_types_, settings_); - diving_worker_pool_.init( - num_diving_workers, original_lp_, Arow_, var_types_, settings_, num_bfs_workers); - num_active_workers_.fill(0); - - f_t lower_bound = get_lower_bound(); - f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - - launch_bfs_worker(search_tree_.root.get_up_child()); - launch_bfs_worker(search_tree_.root.get_down_child()); - - while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && num_active_workers_[BEST_FIRST] > 0) { - bool launched_any_task = false; - - repair_heuristic_solutions(); - - f_t now = toc(exploration_stats_.start_time); - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; - - if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && - time_since_last_log >= 1) || - (time_since_last_log > 30) || now > settings_.time_limit) { - i_t node_depth = std::numeric_limits::max(); - i_t int_infeas = 0; - - for (int k = 0; k < num_bfs_workers; ++k) { - bfs_worker_t* worker = bfs_worker_pool_.get_worker(k); - if (worker->is_active) { - node_depth = std::min(node_depth, worker->node_depth.load()); - int_infeas = std::max(int_infeas, worker->integer_infeasible.load()); - } - } - - for (int i = 0; i < max_num_workers_per_type.size(); ++i) { - settings_.log.printf("%c%d: max num of workers = %d/%d", - feasible_solution_symbol(static_cast(i)), - i, - num_active_workers_[i], - max_num_workers_per_type[i]); - } - - report(' ', upper_bound_, lower_bound, node_depth, int_infeas); - exploration_stats_.last_log = tic(); - exploration_stats_.nodes_since_last_log = 0; - } - - if (now > settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - break; - } - - for (i_t k = 0; k < num_bfs_workers && bfs_worker_pool_.num_idle_workers() > 0; ++k) { - bfs_worker_t* worker = bfs_worker_pool_.get_worker(k); - if (worker->is_active && worker->node_queue.best_first_queue_size() > 1) { - mip_node_t* node = nullptr; - { - std::lock_guard lock(worker->node_queue); - node = worker->node_queue.pop_best_first(); - } - if (!node) { continue; } - if (!launch_bfs_worker(node)) { - worker->node_queue.push(node); - break; - } - launched_any_task = true; - } - } - - lower_bound = get_lower_bound(); - abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); +#pragma omp task affinity(diving_worker) + dive_with(diving_worker); - if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { - node_concurrent_halt_ = 1; - solver_status_ = mip_status_t::OPTIMAL; - break; + return true; } - - // If no new task was launched in this iteration, suspend temporarily the - // execution of the scheduler. As of 8/Jan/2026, GCC does not - // implement taskyield, but LLVM does. - if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } + return false; } template void branch_and_bound_t::single_threaded_solve() { bfs_worker_pool_.init(1, original_lp_, Arow_, var_types_, settings_); - bfs_worker_t* worker = bfs_worker_pool_.get_worker(0); + bfs_worker_t* worker = bfs_worker_pool_[0]; + worker->is_main_worker = true; + node_queue_t& node_queue = worker->node_queue; node_queue.push(search_tree_.root.get_down_child()); node_queue.push(search_tree_.root.get_up_child()); - - f_t lower_bound = get_lower_bound(); - f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - - while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && node_queue.best_first_queue_size() > 0) { - repair_heuristic_solutions(); - - f_t now = toc(exploration_stats_.start_time); - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; - - if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && - time_since_last_log >= 1) || - (time_since_last_log > 30) || now > settings_.time_limit) { - i_t depth = worker->node_depth; - i_t int_infeas = worker->integer_infeasible; - report(' ', upper_bound_, lower_bound, depth, int_infeas); - exploration_stats_.last_log = tic(); - exploration_stats_.nodes_since_last_log = 0; - } - - if (now > settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - break; - } - - // If there any node left in the heap, we pop the top node and explore it. - mip_node_t* start_node = node_queue.pop_best_first(); - - if (!start_node) { continue; } - if (upper_bound_.load() < start_node->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the - // current upper bound - search_tree_.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); - search_tree_.update(start_node, node_status_t::FATHOMED); - continue; - } - - plunge_with(worker, start_node); - - lower_bound = get_lower_bound(); - abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - - if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { - solver_status_ = mip_status_t::OPTIMAL; - break; - } - } + best_first_search_with(worker); } template @@ -1986,7 +1856,6 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( while (!root_crossover_solution_set_.load(std::memory_order_acquire) && *get_root_concurrent_halt() == 0) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); - continue; } if (root_crossover_solution_set_.load(std::memory_order_acquire)) { @@ -2709,10 +2578,21 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (settings_.deterministic) { run_deterministic_coordinator(Arow_); } else if (settings_.num_threads > 1) { + const i_t num_workers = 2 * settings_.num_threads; + const i_t num_bfs_workers = std::max(settings_.num_threads / 2, 1); + const i_t num_diving_workers = num_workers - num_bfs_workers; + bfs_worker_pool_.init(num_bfs_workers, original_lp_, Arow_, var_types_, settings_); + diving_worker_pool_.init( + num_diving_workers, original_lp_, Arow_, var_types_, settings_, num_bfs_workers); + #pragma omp parallel num_threads(settings_.num_threads) { #pragma omp master - run_scheduler(); + { + auto worker = launch_bfs_worker(search_tree_.root.get_up_child()); + worker->is_main_worker = true; + launch_bfs_worker(search_tree_.root.get_down_child()); + } } } else { single_threaded_solve(); @@ -2729,7 +2609,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut lower_bound = std::numeric_limits::infinity(); for (int i = 0; i < bfs_worker_pool_.num_workers(); ++i) { - bfs_worker_t* worker = bfs_worker_pool_.get_worker(i); + bfs_worker_t* worker = bfs_worker_pool_[i]; // We need to clear the queue and use the info in the search tree for the lower bound while (worker->node_queue.best_first_queue_size() > 0) { @@ -2880,11 +2760,11 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri deterministic_horizon_step_ = 0.50; // Compute worker counts using the same formula as reliability-branching scheduler - const i_t num_workers = 2 * settings_.num_threads; - std::vector search_strategies = - get_search_strategies(settings_.diving_settings); - std::array max_num_workers = - get_max_workers(num_workers, search_strategies); + const i_t num_workers = 2 * settings_.num_threads; + std::vector search_strategies = {}; + // get_search_strategies(settings_.diving_settings); + std::array max_num_workers = {}; + // get_max_workers(num_workers, search_strategies); const int num_bfs_workers = max_num_workers[search_strategy_t::BEST_FIRST]; int num_diving_workers = 0; diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 612bb5026c..993f7f5f05 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -238,10 +238,6 @@ class branch_and_bound_t { // Search tree search_tree_t search_tree_; - // Count the number of workers per type that either are being executed or - // are waiting to be executed. - std::array, num_search_strategies> num_active_workers_; - // Worker pool dedicated to the best-first search bfs_worker_pool_t bfs_worker_pool_; @@ -290,9 +286,8 @@ class branch_and_bound_t { // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); - bool launch_bfs_worker(mip_node_t* start_node); - bool launch_diving_worker(bfs_worker_t* bfs_worker, - std::vector::value_type diving_type); + bfs_worker_t* launch_bfs_worker(mip_node_t* start_node); + bool launch_diving_worker(bfs_worker_t* bfs_worker); void best_first_search_with(bfs_worker_t* worker); @@ -304,11 +299,6 @@ class branch_and_bound_t { // Perform a deep dive in the subtree determined by the `start_node` in order // to find integer feasible solutions. void dive_with(diving_worker_t* worker); - - // Run the scheduler whose will schedule and manage - // all the other workers. - void run_scheduler(); - // Run the branch-and-bound algorithm in single threaded mode. // This disable all diving heuristics. void single_threaded_solve(); @@ -445,6 +435,8 @@ class branch_and_bound_t { } }; heap_t diving_heap_; + + friend class branch_and_bound_worker_t; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/constants.hpp b/cpp/src/branch_and_bound/constants.hpp index ab8677095c..a84c4451b4 100644 --- a/cpp/src/branch_and_bound/constants.hpp +++ b/cpp/src/branch_and_bound/constants.hpp @@ -6,6 +6,7 @@ /* clang-format on */ #pragma once +#include namespace cuopt::linear_programming::dual_simplex { @@ -24,6 +25,9 @@ enum search_strategy_t : int { COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) }; +constexpr std::array search_strategies = { + BEST_FIRST, PSEUDOCOST_DIVING, LINE_SEARCH_DIVING, GUIDED_DIVING, COEFFICIENT_DIVING}; + enum class rounding_direction_t { NONE = -1, DOWN = 0, UP = 1 }; enum class branch_and_bound_mode_t { PARALLEL = 0, DETERMINISTIC = 1 }; diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index acded68f9d..bd2144a5aa 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -9,17 +9,15 @@ #include #include +#include #include #include #include -#include #include -#include "node_queue.hpp" - namespace cuopt::linear_programming::dual_simplex { template @@ -33,6 +31,21 @@ struct branch_and_bound_stats_t { omp_atomic_t last_log = 0.0; }; +template +bool is_search_strategy_enabled(search_strategy_t strategy, + bool has_incumbent, + diving_heuristics_settings_t settings) +{ + switch (strategy) { + case BEST_FIRST: return true; + case PSEUDOCOST_DIVING: return settings.pseudocost_diving != 0; + case LINE_SEARCH_DIVING: return settings.line_search_diving != 0; + case GUIDED_DIVING: return settings.guided_diving != 0 && has_incumbent; + case COEFFICIENT_DIVING: return settings.coefficient_diving != 0; + default: return false; + } +} + template class branch_and_bound_worker_t { public: @@ -43,8 +56,6 @@ class branch_and_bound_worker_t { omp_atomic_t search_strategy; omp_atomic_t is_active; omp_atomic_t lower_bound; - omp_atomic_t node_depth; - omp_atomic_t integer_infeasible; lp_problem_t leaf_problem; lp_solution_t leaf_solution; @@ -125,6 +136,10 @@ class bfs_worker_t : public branch_and_bound_worker_t { Base::start_lower = original_lp.lower; Base::start_upper = original_lp.upper; Base::search_strategy = BEST_FIRST; + + max_diving_workers.fill(0); + active_diving_workers.fill(0); + total_active_diving_workers = 0; } f_t get_lower_bound() @@ -145,7 +160,54 @@ class bfs_worker_t : public branch_and_bound_worker_t { Base::is_active = true; } + void set_inactive() { Base::is_active = false; } + + void calculate_num_diving_workers(i_t num_bfs_workers, + i_t total_diving_workers, + bool has_incumbent, + const diving_heuristics_settings_t& settings) + { + i_t num_active = 0; + for (i_t i = 1; i < num_search_strategies; ++i) { + num_active += is_search_strategy_enabled(search_strategies[i], has_incumbent, settings); + } + + total_max_diving_workers = 0; + for (size_t i = 1, k = 0; i < num_search_strategies; ++i) { + // Calculate the number of workers for a given diving heuristic + i_t start = std::floor((double)k * total_diving_workers / num_active); + i_t end = std::floor((double)(k + 1) * total_diving_workers / num_active); + i_t workers_per_type = end - start; + + // Calculate the number of diving workers allocated to this (best-first) worker + start = std::floor((double)Base::worker_id * workers_per_type / num_bfs_workers); + end = std::floor((double)(Base::worker_id + 1) * workers_per_type / num_bfs_workers); + max_diving_workers[i] = end - start; + total_max_diving_workers += max_diving_workers[i]; + ++k; + } + } + + // Flag to indicate if this worker is responsible for reporting, checking the convergence + // and repairing the heuristic solutions. + bool is_main_worker = false; + + // The worker-local node heap. node_queue_t node_queue; + + // The number of diving workers of each type that this (best-first) worker can launch. + std::array max_diving_workers; + + // The number of active diving workers of each type associated with this (best-first) worker. + std::array, num_search_strategies> active_diving_workers; + + // Keep track of the total number of active diving worker that are associated with this + // (best-first) worker + omp_atomic_t total_active_diving_workers; + + // The maximum number of diving worker that are associated with this + // (best-first) worker + i_t total_max_diving_workers; }; template @@ -154,16 +216,13 @@ class diving_worker_t : public branch_and_bound_worker_t { using Base = branch_and_bound_worker_t; using Base::Base; - void init(const mip_node_t* node, - const lp_problem_t& original_lp, - search_strategy_t strategy) + void init(const mip_node_t* node, const lp_problem_t& original_lp) { - start_node = node->detach_copy(); - Base::start_lower = original_lp.lower; - Base::start_upper = original_lp.upper; - Base::search_strategy = strategy; - Base::lower_bound = node->lower_bound; - Base::is_active = true; + start_node = node->detach_copy(); + Base::start_lower = original_lp.lower; + Base::start_upper = original_lp.upper; + Base::lower_bound = node->lower_bound; + Base::is_active = true; std::fill(Base::bounds_changed.begin(), Base::bounds_changed.end(), false); node->get_variable_bounds(Base::start_lower, Base::start_upper, Base::bounds_changed); } @@ -179,7 +238,18 @@ class diving_worker_t : public branch_and_bound_worker_t { return Base::is_active ? Base::lower_bound.load() : std::numeric_limits::infinity(); } + void set_inactive() + { + Base::is_active = false; + --bfs_worker->total_active_diving_workers; + --bfs_worker->active_diving_workers[Base::search_strategy]; + } + mip_node_t start_node; + + // The best-first worker that is associated with this diving worker. Used for controlling the + // number of active diving workers. + bfs_worker_t* bfs_worker; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index e4de1af7b9..3f5147279b 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -49,9 +49,9 @@ class worker_pool_t { return workers_[idx].get(); } } + void return_worker_to_pool(WorkerType* worker) { - worker->is_active = false; std::lock_guard lock(mutex_); idle_workers_.push_back(worker->worker_id); num_idle_workers_++; @@ -70,7 +70,8 @@ class worker_pool_t { return lower_bound; } - WorkerType* get_worker(i_t id) { return workers_[id].get(); } + WorkerType* operator[](i_t id) { return workers_[id].get(); } + WorkerType* operator[](i_t id) const { return workers_[id].get(); } i_t num_idle_workers() const { return num_idle_workers_; } i_t num_workers() const { return workers_.size(); } @@ -84,43 +85,6 @@ class worker_pool_t { omp_atomic_t num_idle_workers_; }; -template -std::vector get_search_strategies( - diving_heuristics_settings_t settings) -{ - std::vector types; - types.reserve(num_search_strategies); - types.push_back(BEST_FIRST); - if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); } - if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); } - if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); } - if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); } - return types; -} - -template -std::array get_max_workers( - i_t num_workers, const std::vector& strategies) -{ - std::array max_num_workers; - max_num_workers.fill(0); - - i_t bfs_workers = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1); - max_num_workers[BEST_FIRST] = bfs_workers; - - i_t diving_workers = (num_workers - bfs_workers); - i_t m = strategies.size() - 1; - - for (size_t i = 1, k = 0; i < strategies.size(); ++i) { - i_t start = (double)k * diving_workers / m; - i_t end = (double)(k + 1) * diving_workers / m; - max_num_workers[strategies[i]] = end - start; - ++k; - } - - return max_num_workers; -} - template using bfs_worker_pool_t = worker_pool_t>; From e6b19f33e22c1221b0ae236529b007e9f990941f Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Mon, 20 Apr 2026 16:21:29 +0200 Subject: [PATCH 28/53] fixed deterministic code path Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index adf2420f97..caf0b8dcf8 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -2760,17 +2760,9 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri deterministic_horizon_step_ = 0.50; // Compute worker counts using the same formula as reliability-branching scheduler - const i_t num_workers = 2 * settings_.num_threads; - std::vector search_strategies = {}; - // get_search_strategies(settings_.diving_settings); - std::array max_num_workers = {}; - // get_max_workers(num_workers, search_strategies); - - const int num_bfs_workers = max_num_workers[search_strategy_t::BEST_FIRST]; - int num_diving_workers = 0; - for (size_t i = 1; i < search_strategies.size(); ++i) { - num_diving_workers += max_num_workers[search_strategies[i]]; - } + const i_t num_workers = 2 * settings_.num_threads; + const i_t num_bfs_workers = std::max(num_workers / 4, 1); + const i_t num_diving_workers = num_workers - num_bfs_workers; deterministic_mode_enabled_ = true; deterministic_current_horizon_ = deterministic_horizon_step_; From 6135616d85c7e6446b723b9eb1037678f3888045 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Mon, 20 Apr 2026 17:57:25 +0200 Subject: [PATCH 29/53] set the worker 0 to be the main one. node_queue must be manually lock and unlock (push and pop semantics now match) Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 35 ++++++++++------- cpp/src/branch_and_bound/node_queue.hpp | 9 ++--- cpp/src/branch_and_bound/worker.hpp | 39 ++++++++++--------- cpp/src/branch_and_bound/worker_pool.hpp | 2 +- 4 files changed, 46 insertions(+), 39 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index caf0b8dcf8..e25acebfcc 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1458,7 +1458,7 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, while (stack.size() > 0 && (solver_status_ == mip_status_t::UNSET && is_running_) && rel_gap > settings_.relative_mip_gap_tol && abs_gap > settings_.absolute_mip_gap_tol) { - if (worker->is_main_worker) { repair_heuristic_solutions(); } + if (worker->worker_id == 0) { repair_heuristic_solutions(); } mip_node_t* node_ptr = stack.front(); stack.pop_front(); @@ -1482,7 +1482,7 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, f_t now = toc(exploration_stats_.start_time); - if (worker->is_main_worker) { + if (worker->worker_id == 0) { f_t time_since_last_log = exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; @@ -1544,14 +1544,18 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, if (stack.size() > 0) { mip_node_t* node = stack.back(); stack.pop_back(); + worker->node_queue.lock(); worker->node_queue.push(node); + worker->node_queue.unlock(); } exploration_stats_.nodes_unexplored += 2; if (round_dir == rounding_direction_t::UP) { if (worker->node_queue.best_first_queue_size() < min_node_queue_size_) { + worker->node_queue.lock(); worker->node_queue.push(node_ptr->get_down_child()); + worker->node_queue.unlock(); } else { stack.push_front(node_ptr->get_down_child()); } @@ -1559,7 +1563,9 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, stack.push_front(node_ptr->get_up_child()); } else { if (worker->node_queue.best_first_queue_size() < min_node_queue_size_) { + worker->node_queue.lock(); worker->node_queue.push(node_ptr->get_up_child()); + worker->node_queue.unlock(); } else { stack.push_front(node_ptr->get_up_child()); } @@ -1573,12 +1579,6 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); - if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { - node_concurrent_halt_ = 1; - solver_status_ = mip_status_t::OPTIMAL; - break; - } - // Launch a new diving task if any worker is idle if (worker->total_active_diving_workers < worker->total_max_diving_workers && worker->node_queue.diving_queue_size() > 0) { @@ -1602,7 +1602,9 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, while (!stack.empty()) { auto node = stack.front(); stack.pop_front(); + worker->node_queue.lock(); worker->node_queue.push(node); + worker->node_queue.unlock(); } } @@ -1615,7 +1617,7 @@ bfs_worker_t* branch_and_bound_t::launch_bfs_worker( idle_worker->init(start_node); -#pragma omp task affinity(idle_worker) +#pragma omp task affinity(*idle_worker) priority(99) best_first_search_with(idle_worker); return idle_worker; @@ -1671,6 +1673,12 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t lower_bound = get_lower_bound(); abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { + node_concurrent_halt_ = 1; + solver_status_ = mip_status_t::OPTIMAL; + break; + } } worker->set_inactive(); @@ -1799,12 +1807,14 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* bfs_worker->active_diving_workers[strategy]++; bfs_worker->total_active_diving_workers++; -#pragma omp task affinity(diving_worker) +#pragma omp task affinity(*diving_worker) dive_with(diving_worker); return true; } } + + diving_worker_pool_.return_worker_to_pool(diving_worker); return false; } @@ -1813,7 +1823,6 @@ void branch_and_bound_t::single_threaded_solve() { bfs_worker_pool_.init(1, original_lp_, Arow_, var_types_, settings_); bfs_worker_t* worker = bfs_worker_pool_[0]; - worker->is_main_worker = true; node_queue_t& node_queue = worker->node_queue; node_queue.push(search_tree_.root.get_down_child()); @@ -2589,8 +2598,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut { #pragma omp master { - auto worker = launch_bfs_worker(search_tree_.root.get_up_child()); - worker->is_main_worker = true; + auto worker = launch_bfs_worker(search_tree_.root.get_up_child()); + std::cout << std::format("Worker {} is the main one", worker->worker_id) << std::endl; launch_bfs_worker(search_tree_.root.get_down_child()); } } diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 2ae04847d2..10b5c57701 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -87,21 +87,20 @@ class heap_t { Comp comp; }; -// A queue storing the nodes waiting to be explored/dived from. +// A queue storing the nodes waiting to be explored. Before calling pop or push in parallel, +// the mutex NEEDS to be acquired via the `lock()` method. It must the released afterwards with +// `unlock()`. template class node_queue_t { public: void push(mip_node_t* new_node) { - std::lock_guard lock(mutex_); auto entry = std::make_shared(new_node); best_first_heap_.push(entry); diving_heap_.push(entry); lower_bound_ = best_first_heap_.top()->lower_bound; } - // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call - // `unlock()` afterward. mip_node_t* pop_best_first() { if (best_first_heap_.empty()) { return nullptr; } @@ -112,8 +111,6 @@ class node_queue_t { return node; } - // This **MUST** only be called after acquiring the mutex with `lock()`. Remember to call - // `unlock()` afterward. mip_node_t* pop_diving() { while (!diving_heap_.empty()) { diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index bd2144a5aa..203ce945d2 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -173,25 +173,26 @@ class bfs_worker_t : public branch_and_bound_worker_t { } total_max_diving_workers = 0; + max_diving_workers.fill(0); + if (num_active == 0) { return; } + for (size_t i = 1, k = 0; i < num_search_strategies; ++i) { - // Calculate the number of workers for a given diving heuristic - i_t start = std::floor((double)k * total_diving_workers / num_active); - i_t end = std::floor((double)(k + 1) * total_diving_workers / num_active); - i_t workers_per_type = end - start; - - // Calculate the number of diving workers allocated to this (best-first) worker - start = std::floor((double)Base::worker_id * workers_per_type / num_bfs_workers); - end = std::floor((double)(Base::worker_id + 1) * workers_per_type / num_bfs_workers); - max_diving_workers[i] = end - start; - total_max_diving_workers += max_diving_workers[i]; - ++k; + if (is_search_strategy_enabled(search_strategies[i], has_incumbent, settings)) { + // Calculate the number of workers for a given diving heuristic + i_t start = std::floor((double)k * total_diving_workers / num_active); + i_t end = std::floor((double)(k + 1) * total_diving_workers / num_active); + i_t workers_per_type = end - start; + + // Calculate the number of diving workers allocated to this (best-first) worker + start = std::floor((double)Base::worker_id * workers_per_type / num_bfs_workers); + end = std::floor((double)(Base::worker_id + 1) * workers_per_type / num_bfs_workers); + max_diving_workers[i] = end - start; + total_max_diving_workers += max_diving_workers[i]; + ++k; + } } } - // Flag to indicate if this worker is responsible for reporting, checking the convergence - // and repairing the heuristic solutions. - bool is_main_worker = false; - // The worker-local node heap. node_queue_t node_queue; @@ -203,11 +204,11 @@ class bfs_worker_t : public branch_and_bound_worker_t { // Keep track of the total number of active diving worker that are associated with this // (best-first) worker - omp_atomic_t total_active_diving_workers; + omp_atomic_t total_active_diving_workers{0}; // The maximum number of diving worker that are associated with this // (best-first) worker - i_t total_max_diving_workers; + i_t total_max_diving_workers{0}; }; template @@ -241,15 +242,15 @@ class diving_worker_t : public branch_and_bound_worker_t { void set_inactive() { Base::is_active = false; - --bfs_worker->total_active_diving_workers; --bfs_worker->active_diving_workers[Base::search_strategy]; + --bfs_worker->total_active_diving_workers; } mip_node_t start_node; // The best-first worker that is associated with this diving worker. Used for controlling the // number of active diving workers. - bfs_worker_t* bfs_worker; + bfs_worker_t* bfs_worker{nullptr}; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index 3f5147279b..a4551de70c 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -31,7 +31,7 @@ class worker_pool_t { for (i_t i = 0; i < num_workers; ++i) { workers_[i] = std::make_unique(i, original_lp, Arow, var_type, settings, rng_offset); - idle_workers_.push_front(i); + idle_workers_.push_back(i); } is_initialized = true; From 70b75491ed80c17faabf77c0107040d8e051620e Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 21 Apr 2026 13:42:43 +0200 Subject: [PATCH 30/53] small fixes Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 10 ++++------ cpp/src/branch_and_bound/branch_and_bound.hpp | 2 -- cpp/src/branch_and_bound/diving_heuristics.cpp | 2 +- cpp/src/branch_and_bound/worker_pool.hpp | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index e25acebfcc..fb3f6e2227 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -256,9 +256,7 @@ branch_and_bound_t::branch_and_bound_t( root_relax_soln_(1, 1), root_crossover_soln_(1, 1), pc_(1), - solver_status_(mip_status_t::UNSET), - rng_(settings_.random_seed ^ pcgenerator_t::default_seed, - settings_.random_seed ^ pcgenerator_t::default_stream) + solver_status_(mip_status_t::UNSET) { exploration_stats_.start_time = start_time; #ifdef PRINT_CONSTRAINT_MATRIX @@ -1615,6 +1613,7 @@ bfs_worker_t* branch_and_bound_t::launch_bfs_worker( bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); if (!idle_worker) { return nullptr; } + assert(start_node != nullptr); idle_worker->init(start_node); #pragma omp task affinity(*idle_worker) priority(99) @@ -1784,8 +1783,8 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* if (!start_node || upper_bound_.load() < start_node->lower_bound || start_node->depth < settings_.diving_settings.min_node_depth) { - diving_worker_pool_.return_worker_to_pool(diving_worker); bfs_worker->node_queue.unlock(); + diving_worker_pool_.return_worker_to_pool(diving_worker); return false; } @@ -2598,8 +2597,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut { #pragma omp master { - auto worker = launch_bfs_worker(search_tree_.root.get_up_child()); - std::cout << std::format("Worker {} is the main one", worker->worker_id) << std::endl; + launch_bfs_worker(search_tree_.root.get_up_child()); launch_bfs_worker(search_tree_.root.get_down_child()); } } diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 993f7f5f05..52cd96525d 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -244,8 +244,6 @@ class branch_and_bound_t { // Worker pool dedicated to diving diving_worker_pool_t diving_worker_pool_; - pcgenerator_t rng_; - // Global status of the solver. omp_atomic_t solver_status_; omp_atomic_t is_running_{false}; diff --git a/cpp/src/branch_and_bound/diving_heuristics.cpp b/cpp/src/branch_and_bound/diving_heuristics.cpp index 571027c1d7..abe6b0dff8 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.cpp +++ b/cpp/src/branch_and_bound/diving_heuristics.cpp @@ -97,7 +97,7 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, f_t score = 0; rounding_direction_t dir = rounding_direction_t::DOWN; - f_t root_val = (j < static_cast(root_solution.size())) ? root_solution[j] : solution[j]; + f_t root_val = root_solution[j]; if (solution[j] < root_val - f_t(0.4)) { score = score_down; diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index a4551de70c..e8b3216b5b 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -82,7 +82,7 @@ class worker_pool_t { omp_mutex_t mutex_; circular_deque_t idle_workers_; - omp_atomic_t num_idle_workers_; + omp_atomic_t num_idle_workers_{0}; }; template From e6c2bc56befdb6da3ca9b4c9666fa09280e03e76 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 21 Apr 2026 13:59:01 +0200 Subject: [PATCH 31/53] added a flag to conditionally build the remote execution (gRPC). removed DISABLE_OPENMP flag since it always needed by the MIP solver. Signed-off-by: Nicolas L. Guidotti --- build.sh | 9 +- cpp/CMakeLists.txt | 1075 ++++++++++++++++--------------- cpp/src/mip_heuristics/solve.cu | 2 + cpp/src/pdlp/solve.cu | 2 + 4 files changed, 553 insertions(+), 535 deletions(-) diff --git a/build.sh b/build.sh index 5f9ac4071a..d07faea237 100755 --- a/build.sh +++ b/build.sh @@ -15,7 +15,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd) LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build} LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build} -VALIDARGS="clean libcuopt cuopt_grpc_server libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" +VALIDARGS="clean libcuopt cuopt_grpc_server libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-grpc-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -44,6 +44,7 @@ HELP="$0 [ ...] [ ...] --skip-c-python-adapters - skip building C and Python adapter files (cython_solve.cu and cuopt_c.cpp) --skip-tests-build - disable building of all tests --skip-routing-build - skip building routing components + --skip-grpc-build - skip building gRPC and protobuf components (auto-enabled with -tsan) --skip-fatbin-write - skip the fatbin write --host-lineinfo - build with debug line information for host code --cache-tool= - pass the build cache tool (eg: ccache, sccache, distcc) that will be used @@ -84,6 +85,7 @@ BUILD_MSAN=0 SKIP_C_PYTHON_ADAPTERS=0 SKIP_TESTS_BUILD=0 SKIP_ROUTING_BUILD=0 +SKIP_GRPC_BUILD=0 WRITE_FATBIN=1 HOST_LINEINFO=0 CACHE_ARGS=() @@ -238,6 +240,7 @@ if hasArg -fsanitize; then fi if hasArg -tsan; then BUILD_TSAN=1 + SKIP_GRPC_BUILD=1 fi if hasArg -msan; then BUILD_MSAN=1 @@ -251,6 +254,9 @@ fi if hasArg --skip-routing-build; then SKIP_ROUTING_BUILD=1 fi +if hasArg --skip-grpc-build; then + SKIP_GRPC_BUILD=1 +fi if hasArg --skip-fatbin-write; then WRITE_FATBIN=0 fi @@ -379,6 +385,7 @@ if buildAll || hasArg libcuopt || hasArg cuopt_grpc_server; then -DSKIP_C_PYTHON_ADAPTERS=${SKIP_C_PYTHON_ADAPTERS} \ -DBUILD_TESTS=$((1 - ${SKIP_TESTS_BUILD})) \ -DSKIP_ROUTING_BUILD=${SKIP_ROUTING_BUILD} \ + -DSKIP_GRPC_BUILD=${SKIP_GRPC_BUILD} \ -DWRITE_FATBIN=${WRITE_FATBIN} \ -DHOST_LINEINFO=${HOST_LINEINFO} \ -DPARALLEL_LEVEL="${PARALLEL_LEVEL}" \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index abc6a19ab2..3b4e085e8b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -24,9 +24,9 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") message(STATUS "CMAKE_MODULE_PATH = ${CMAKE_MODULE_PATH}") project( - CUOPT - VERSION "${RAPIDS_VERSION}" - LANGUAGES CXX CUDA C + CUOPT + VERSION "${RAPIDS_VERSION}" + LANGUAGES CXX CUDA C ) # Disable C++20 module scanning as the codebase doesn't use modules @@ -43,10 +43,10 @@ rapids_cmake_build_type(Release) # - User Options ------------------------------------------------------------ option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc useful for cuda-memcheck / profiler" ON) option(BUILD_TESTS "Configure CMake to build tests" ON) -option(DISABLE_OPENMP "Disable OpenMP" OFF) option(BUILD_LP_ONLY "Build only linear programming components, exclude routing and MIP-specific files" OFF) option(SKIP_C_PYTHON_ADAPTERS "Skip building C and Python adapter files (cython_solve.cu and cuopt_c.cpp)" OFF) option(SKIP_ROUTING_BUILD "Skip building routing components" OFF) +option(SKIP_GRPC_BUILD "Skip building gRPC and protobuf components" OFF) option(WRITE_FATBIN "Enable fatbin writing" ON) option(HOST_LINEINFO "Build with debug line information for host code" OFF) @@ -67,69 +67,69 @@ message(VERBOSE "cuOpt: fatbin: ${WRITE_FATBIN}") rapids_cuda_init_runtime(USE_STATIC ON) rapids_find_package(CUDAToolkit REQUIRED - BUILD_EXPORT_SET cuopt-exports - INSTALL_EXPORT_SET cuopt-exports + BUILD_EXPORT_SET cuopt-exports + INSTALL_EXPORT_SET cuopt-exports ) set(CUOPT_CXX_FLAGS "") set(CUOPT_CUDA_FLAGS "") -if(CMAKE_COMPILER_IS_GNUCXX) - list(APPEND CUOPT_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) -endif(CMAKE_COMPILER_IS_GNUCXX) +if (CMAKE_COMPILER_IS_GNUCXX) + list(APPEND CUOPT_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) +endif (CMAKE_COMPILER_IS_GNUCXX) # Papilo pulls in Boost.Multiprecision float128 support, which expects quadmath.h from the GCC # toolchain internals. Conda clang ships libquadmath, but does not surface the matching GCC # internal include directory by default. Add it late in the search order so clang still prefers its # own builtin intrinsic headers. -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - execute_process( - COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libquadmath.a - OUTPUT_VARIABLE CUOPT_QUADMATH_LIB - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - if(IS_ABSOLUTE "${CUOPT_QUADMATH_LIB}") - get_filename_component(CUOPT_QUADMATH_LIBDIR "${CUOPT_QUADMATH_LIB}" DIRECTORY) - set(CUOPT_QUADMATH_INCLUDEDIR "${CUOPT_QUADMATH_LIBDIR}/include") - - if(EXISTS "${CUOPT_QUADMATH_INCLUDEDIR}/quadmath.h") - message(STATUS "Adding clang fallback include for quadmath: ${CUOPT_QUADMATH_INCLUDEDIR}") - add_compile_options("$<$:-idirafter${CUOPT_QUADMATH_INCLUDEDIR}>") - endif() - endif() -endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libquadmath.a + OUTPUT_VARIABLE CUOPT_QUADMATH_LIB + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if (IS_ABSOLUTE "${CUOPT_QUADMATH_LIB}") + get_filename_component(CUOPT_QUADMATH_LIBDIR "${CUOPT_QUADMATH_LIB}" DIRECTORY) + set(CUOPT_QUADMATH_INCLUDEDIR "${CUOPT_QUADMATH_LIBDIR}/include") + + if (EXISTS "${CUOPT_QUADMATH_INCLUDEDIR}/quadmath.h") + message(STATUS "Adding clang fallback include for quadmath: ${CUOPT_QUADMATH_INCLUDEDIR}") + add_compile_options("$<$:-idirafter${CUOPT_QUADMATH_INCLUDEDIR}>") + endif () + endif () +endif () # To use sanitizer with cuda runtime, one must follow a few steps: # 1. Run the binary with env var set: LD_PRELOAD="$(gcc -print-file-name=libasan.so)" ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0' # 2. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0 -if(BUILD_SANITIZER) - list(APPEND CUOPT_CXX_FLAGS -fsanitize=address,undefined -fno-omit-frame-pointer -g) - if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - list(APPEND CUOPT_CXX_FLAGS -Wno-error=maybe-uninitialized) - endif() - add_link_options(-fsanitize=address,undefined) -endif(BUILD_SANITIZER) +if (BUILD_SANITIZER) + list(APPEND CUOPT_CXX_FLAGS -fsanitize=address,undefined -fno-omit-frame-pointer -g) + if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + list(APPEND CUOPT_CXX_FLAGS -Wno-error=maybe-uninitialized) + endif () + add_link_options(-fsanitize=address,undefined) +endif (BUILD_SANITIZER) # To use ThreadSanitizer: # 1. Build with clang and the -tsan flag # 2. Run the binary with env var set: OMP_TOOL_LIBRARIES=/usr/lib/llvm-17/lib/libarcher.so ARCHER_OPTIONS='verbose=1' TSAN_OPTIONS='suppresions=cpp/utilities/tsan_suppressions.txt:ignore_noninstrumented_modules=1:halt_on_error=1' # Replace with local llvm install path. libarcher.so must be presetn -if(BUILD_TSAN) - message(STATUS "Building with ThreadSanitizer enabled") - list(APPEND CUOPT_CXX_FLAGS -fsanitize=thread -fno-omit-frame-pointer -g) - add_link_options(-fsanitize=thread) -endif(BUILD_TSAN) +if (BUILD_TSAN) + message(STATUS "Building with ThreadSanitizer enabled") + list(APPEND CUOPT_CXX_FLAGS -fsanitize=thread -fno-omit-frame-pointer -g) + add_link_options(-fsanitize=thread) +endif (BUILD_TSAN) # To use MemorySanitizer: # 1. Build with clang and the -msan flag (MemorySanitizer requires clang) # 2. Run the binary with env var set: MSAN_OPTIONS='halt_on_error=1' # Note: MemorySanitizer requires all code (including libraries) to be instrumented for accurate results -if(BUILD_MSAN) - message(STATUS "Building with MemorySanitizer enabled") - list(APPEND CUOPT_CXX_FLAGS -fsanitize=memory -fno-omit-frame-pointer -g -fsanitize-memory-track-origins=1) - add_link_options(-fsanitize=memory) -endif(BUILD_MSAN) +if (BUILD_MSAN) + message(STATUS "Building with MemorySanitizer enabled") + list(APPEND CUOPT_CXX_FLAGS -fsanitize=memory -fno-omit-frame-pointer -g -fsanitize-memory-track-origins=1) + add_link_options(-fsanitize=memory) +endif (BUILD_MSAN) # Note: -UNDEBUG is applied via CUOPT_CXX_FLAGS / CUOPT_CUDA_FLAGS (not add_definitions) # to avoid leaking into dependencies that are built in-tree. @@ -140,27 +140,27 @@ endif(BUILD_MSAN) # Keeping NDEBUG defined for gRPC files makes the header inline an empty Dtor(), # avoiding the missing symbol at runtime. Additionally, gRPC files are always # compiled with -DNDEBUG (see below) so Debug builds also avoid the missing symbol. -if(DEFINE_ASSERT) - add_definitions(-DASSERT_MODE) - list(APPEND CUOPT_CUDA_FLAGS -UNDEBUG) -endif(DEFINE_ASSERT) +if (DEFINE_ASSERT) + add_definitions(-DASSERT_MODE) + list(APPEND CUOPT_CUDA_FLAGS -UNDEBUG) +endif (DEFINE_ASSERT) -if(DEFINE_BENCHMARK) - add_definitions(-DBENCHMARK) -endif(DEFINE_BENCHMARK) +if (DEFINE_BENCHMARK) + add_definitions(-DBENCHMARK) +endif (DEFINE_BENCHMARK) -if(DEFINE_PDLP_VERBOSE_MODE) - add_definitions(-DPDLP_VERBOSE_MODE) -endif(DEFINE_PDLP_VERBOSE_MODE) +if (DEFINE_PDLP_VERBOSE_MODE) + add_definitions(-DPDLP_VERBOSE_MODE) +endif (DEFINE_PDLP_VERBOSE_MODE) # Set logging level set(LIBCUOPT_LOGGING_LEVEL - "INFO" - CACHE STRING "Choose the logging level." + "INFO" + CACHE STRING "Choose the logging level." ) set_property( - CACHE LIBCUOPT_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" - "OFF") + CACHE LIBCUOPT_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" + "OFF") message(VERBOSE "CUOPT: LIBCUOPT_LOGGING_LEVEL = '${LIBCUOPT_LOGGING_LEVEL}'.") message("-- Building with logging level = ${LIBCUOPT_LOGGING_LEVEL}") @@ -170,51 +170,50 @@ message("-- Host target architecture = '${CMAKE_SYSTEM_PROCESSOR}'") # make the flags global in order to propagate flags to test cmake files set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --expt-extended-lambda") -if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -static-global-template-stub=false") -endif() +if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -static-global-template-stub=false") +endif () list(APPEND CUOPT_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xcompiler=-Werror --default-stream=per-thread) -if("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall) -else() - list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall -Wno-error=non-template-friend) -endif() +if ("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall) +else () + list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall -Wno-error=non-template-friend) +endif () list(APPEND CUOPT_CUDA_FLAGS -Xfatbin=-compress-all) -if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13.0) - list(APPEND CUOPT_CUDA_FLAGS -Xfatbin=--compress-level=3) -endif() +if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13.0) + list(APPEND CUOPT_CUDA_FLAGS -Xfatbin=--compress-level=3) +endif () list(APPEND CUOPT_CUDA_FLAGS -fopenmp) # Add jobserver flags for parallel compilation if PARALLEL_LEVEL is set -if(PARALLEL_LEVEL AND NOT "${PARALLEL_LEVEL}" STREQUAL "") - message(STATUS "Enabling nvcc parallel compilation support") - list(APPEND CUOPT_CUDA_FLAGS --threads=0 --split-compile=0) - if(USE_NVCC_JOBSERVER AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) - message(STATUS "Enabling nvcc jobserver support (NVCC >= 13.0)") - list(APPEND CUOPT_CUDA_FLAGS --jobserver) - endif() -endif() - -if(NOT DISABLE_OPENMP) - find_package(OpenMP) - - if(OPENMP_FOUND) +if (PARALLEL_LEVEL AND NOT "${PARALLEL_LEVEL}" STREQUAL "") + message(STATUS "Enabling nvcc parallel compilation support") + list(APPEND CUOPT_CUDA_FLAGS --threads=0 --split-compile=0) + if (USE_NVCC_JOBSERVER AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + message(STATUS "Enabling nvcc jobserver support (NVCC >= 13.0)") + list(APPEND CUOPT_CUDA_FLAGS --jobserver) + endif () +endif () + +find_package(OpenMP) + +if (OPENMP_FOUND) message(VERBOSE "cuOpt: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}") - endif() -endif() +endif () + # Debug options -if(CMAKE_BUILD_TYPE MATCHES Debug) - message(STATUS "Building with debugging flags") - list(APPEND CUOPT_CUDA_FLAGS -G -Xcompiler=-rdynamic -O0) - -# Option to enable line info in CUDA device compilation to allow introspection when profiling / -# memchecking -elseif(CMAKE_CUDA_LINEINFO) - message(STATUS "Enabling line info") - list(APPEND CUOPT_CUDA_FLAGS -lineinfo) - set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -lineinfo") -endif(CMAKE_BUILD_TYPE MATCHES Debug) +if (CMAKE_BUILD_TYPE MATCHES Debug) + message(STATUS "Building with debugging flags") + list(APPEND CUOPT_CUDA_FLAGS -G -Xcompiler=-rdynamic -O0) + + # Option to enable line info in CUDA device compilation to allow introspection when profiling / + # memchecking +elseif (CMAKE_CUDA_LINEINFO) + message(STATUS "Enabling line info") + list(APPEND CUOPT_CUDA_FLAGS -lineinfo) + set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -lineinfo") +endif (CMAKE_BUILD_TYPE MATCHES Debug) # ################################################################################################## # - find CPM based dependencies ------------------------------------------------------------------ @@ -224,34 +223,34 @@ rapids_cmake_install_lib_dir(lib_dir) option(FETCH_RAPIDS "Fetch RAPIDS dependencies" ON) if (FETCH_RAPIDS) - include(cmake/thirdparty/get_cccl.cmake) - include(cmake/thirdparty/get_rmm.cmake) - include(cmake/thirdparty/get_raft.cmake) - # Source-built RMM can hide out-of-line utility symbols such as - # rmm::align_up / rmm::get_current_cuda_device when built with hidden visibility on clang. - # Force default visibility on the fetched rmm target until this is fixed upstream/figured out. - if(TARGET rmm) - set_target_properties(rmm PROPERTIES CXX_VISIBILITY_PRESET default) - endif() -else() - find_package(CCCL REQUIRED) - find_package(RMM REQUIRED) - find_package(RAFT REQUIRED) -endif() + include(cmake/thirdparty/get_cccl.cmake) + include(cmake/thirdparty/get_rmm.cmake) + include(cmake/thirdparty/get_raft.cmake) + # Source-built RMM can hide out-of-line utility symbols such as + # rmm::align_up / rmm::get_current_cuda_device when built with hidden visibility on clang. + # Force default visibility on the fetched rmm target until this is fixed upstream/figured out. + if (TARGET rmm) + set_target_properties(rmm PROPERTIES CXX_VISIBILITY_PRESET default) + endif () +else () + find_package(CCCL REQUIRED) + find_package(RMM REQUIRED) + find_package(RAFT REQUIRED) +endif () FetchContent_Declare( - papilo - GIT_REPOSITORY "https://github.com/scipopt/papilo.git" - # We would want to get the main branch. However, the main branch - # does not have some of the presolvers and settings that we need - # Mainly, probing and clique merging. - # This is the reason we are using the development branch - # from Oct 12, 2025. Once these changes are merged into the main branch, - #we can switch to the main branch. - GIT_TAG "741a2b9c8155b249d6df574d758b4d97d4417520" - GIT_PROGRESS TRUE - EXCLUDE_FROM_ALL - SYSTEM + papilo + GIT_REPOSITORY "https://github.com/scipopt/papilo.git" + # We would want to get the main branch. However, the main branch + # does not have some of the presolvers and settings that we need + # Mainly, probing and clique merging. + # This is the reason we are using the development branch + # from Oct 12, 2025. Once these changes are merged into the main branch, + #we can switch to the main branch. + GIT_TAG "741a2b9c8155b249d6df574d758b4d97d4417520" + GIT_PROGRESS TRUE + EXCLUDE_FROM_ALL + SYSTEM ) find_package(TBB REQUIRED) @@ -264,12 +263,12 @@ FetchContent_MakeAvailable(papilo) # PSLP - Lightweight C presolver for linear programs # https://github.com/dance858/PSLP FetchContent_Declare( - pslp - GIT_REPOSITORY "https://github.com/dance858/PSLP.git" - GIT_TAG "v0.0.8" - GIT_PROGRESS TRUE - EXCLUDE_FROM_ALL - SYSTEM + pslp + GIT_REPOSITORY "https://github.com/dance858/PSLP.git" + GIT_TAG "v0.0.8" + GIT_PROGRESS TRUE + EXCLUDE_FROM_ALL + SYSTEM ) # Build PSLP as static to embed in cuopt (avoids runtime library path issues) @@ -287,174 +286,180 @@ create_logger_macros(CUOPT "cuopt::default_logger()" include/cuopt) find_package(CUDSS REQUIRED) # ################################################################################################## -# - gRPC and Protobuf setup (REQUIRED) ------------------------------------------------------------ - -# gRPC is required for this branch - it provides remote execution features -# gRPC can come from either: -# - an installed CMake package (gRPCConfig.cmake), or -# - an in-tree build (e.g. python/libcuopt uses FetchContent(grpc), which defines gRPC::grpc++). - -if(NOT TARGET OpenSSL::SSL) - find_package(OpenSSL CONFIG QUIET) - if(NOT OpenSSL_FOUND AND NOT OPENSSL_FOUND) - find_package(OpenSSL REQUIRED) - endif() -endif() - -if(NOT TARGET gRPC::grpc++) - find_package(gRPC CONFIG REQUIRED) -endif() - -# Find Protobuf (should come with gRPC, but verify) -if(NOT TARGET protobuf::libprotobuf) - find_package(protobuf CONFIG REQUIRED) -endif() - -set(CUOPT_ENABLE_GRPC ON) -add_compile_definitions(CUOPT_ENABLE_GRPC) -message(STATUS "gRPC enabled (target gRPC::grpc++ is available)") - -# Find protoc compiler (provided by config package or target) -if(TARGET protobuf::protoc) - get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION_RELEASE) - if(NOT _PROTOBUF_PROTOC) - get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION) - endif() -else() - find_package(protobuf CONFIG REQUIRED) - get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION_RELEASE) - if(NOT _PROTOBUF_PROTOC) - get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION) - endif() -endif() - -if(NOT _PROTOBUF_PROTOC) - message(FATAL_ERROR "protoc not found (Protobuf_PROTOC_EXECUTABLE is empty)") -endif() - -# Find grpc_cpp_plugin -if(TARGET grpc_cpp_plugin) - set(_GRPC_CPP_PLUGIN_EXECUTABLE "$") -else() - find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) - if(NOT _GRPC_CPP_PLUGIN_EXECUTABLE) - message(FATAL_ERROR "grpc_cpp_plugin not found") - endif() -endif() - -# Generate C++ code from cuopt_remote.proto (base message definitions) -set(PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc/cuopt_remote.proto") -set(PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.cc") -set(PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.h") - -add_custom_command( - OUTPUT "${PROTO_SRCS}" "${PROTO_HDRS}" - COMMAND ${_PROTOBUF_PROTOC} - ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} - --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/grpc - ${PROTO_FILE} - DEPENDS ${PROTO_FILE} - COMMENT "Generating C++ code from cuopt_remote.proto" - VERBATIM -) - -# Generate gRPC service code from cuopt_remote_service.proto -set(GRPC_PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc/cuopt_remote_service.proto") -set(GRPC_PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.pb.cc") -set(GRPC_PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.pb.h") -set(GRPC_SERVICE_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.grpc.pb.cc") -set(GRPC_SERVICE_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.grpc.pb.h") - -add_custom_command( - OUTPUT "${GRPC_PROTO_SRCS}" "${GRPC_PROTO_HDRS}" "${GRPC_SERVICE_SRCS}" "${GRPC_SERVICE_HDRS}" - COMMAND ${_PROTOBUF_PROTOC} - ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} - --grpc_out ${CMAKE_CURRENT_BINARY_DIR} - --plugin=protoc-gen-grpc=${_GRPC_CPP_PLUGIN_EXECUTABLE} - --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/grpc - ${GRPC_PROTO_FILE} - DEPENDS ${GRPC_PROTO_FILE} ${PROTO_FILE} - COMMENT "Generating gRPC C++ code from cuopt_remote_service.proto" - VERBATIM -) - -message(STATUS "gRPC protobuf code generation configured") - -if(BUILD_TESTS) - include(cmake/thirdparty/get_gtest.cmake) -endif() - -set(CUOPT_SRC_FILES ) +# - gRPC and Protobuf setup ----------------------------------------------------------------------- + +if (NOT SKIP_GRPC_BUILD) + # gRPC can come from either: + # - an installed CMake package (gRPCConfig.cmake), or + # - an in-tree build (e.g. python/libcuopt uses FetchContent(grpc), which defines gRPC::grpc++). + + if (NOT TARGET OpenSSL::SSL) + find_package(OpenSSL CONFIG QUIET) + if (NOT OpenSSL_FOUND AND NOT OPENSSL_FOUND) + find_package(OpenSSL REQUIRED) + endif () + endif () + + if (NOT TARGET gRPC::grpc++) + find_package(gRPC CONFIG REQUIRED) + endif () + + # Find Protobuf (should come with gRPC, but verify) + if (NOT TARGET protobuf::libprotobuf) + find_package(protobuf CONFIG REQUIRED) + endif () + + set(CUOPT_ENABLE_GRPC ON) + add_compile_definitions(CUOPT_ENABLE_GRPC) + message(STATUS "gRPC enabled (target gRPC::grpc++ is available)") + + # Find protoc compiler (provided by config package or target) + if (TARGET protobuf::protoc) + get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION_RELEASE) + if (NOT _PROTOBUF_PROTOC) + get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION) + endif () + else () + find_package(protobuf CONFIG REQUIRED) + get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION_RELEASE) + if (NOT _PROTOBUF_PROTOC) + get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION) + endif () + endif () + + if (NOT _PROTOBUF_PROTOC) + message(FATAL_ERROR "protoc not found (Protobuf_PROTOC_EXECUTABLE is empty)") + endif () + + # Find grpc_cpp_plugin + if (TARGET grpc_cpp_plugin) + set(_GRPC_CPP_PLUGIN_EXECUTABLE "$") + else () + find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) + if (NOT _GRPC_CPP_PLUGIN_EXECUTABLE) + message(FATAL_ERROR "grpc_cpp_plugin not found") + endif () + endif () + + # Generate C++ code from cuopt_remote.proto (base message definitions) + set(PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc/cuopt_remote.proto") + set(PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.cc") + set(PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.h") + + add_custom_command( + OUTPUT "${PROTO_SRCS}" "${PROTO_HDRS}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} + --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/grpc + ${PROTO_FILE} + DEPENDS ${PROTO_FILE} + COMMENT "Generating C++ code from cuopt_remote.proto" + VERBATIM + ) + + # Generate gRPC service code from cuopt_remote_service.proto + set(GRPC_PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc/cuopt_remote_service.proto") + set(GRPC_PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.pb.cc") + set(GRPC_PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.pb.h") + set(GRPC_SERVICE_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.grpc.pb.cc") + set(GRPC_SERVICE_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.grpc.pb.h") + + add_custom_command( + OUTPUT "${GRPC_PROTO_SRCS}" "${GRPC_PROTO_HDRS}" "${GRPC_SERVICE_SRCS}" "${GRPC_SERVICE_HDRS}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} + --grpc_out ${CMAKE_CURRENT_BINARY_DIR} + --plugin=protoc-gen-grpc=${_GRPC_CPP_PLUGIN_EXECUTABLE} + --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/grpc + ${GRPC_PROTO_FILE} + DEPENDS ${GRPC_PROTO_FILE} ${PROTO_FILE} + COMMENT "Generating gRPC C++ code from cuopt_remote_service.proto" + VERBATIM + ) + + message(STATUS "gRPC protobuf code generation configured") + +else () + message(STATUS "gRPC disabled") +endif () + +if (BUILD_TESTS) + include(cmake/thirdparty/get_gtest.cmake) +endif () + +set(CUOPT_SRC_FILES) add_subdirectory(src) if (HOST_LINEINFO) - set_source_files_properties(${CUOPT_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1") -endif() + set_source_files_properties(${CUOPT_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1") +endif () # Apply -UNDEBUG only to solver source files (not gRPC infrastructure). # Must happen before gRPC files are appended to CUOPT_SRC_FILES. # Uses APPEND to preserve any existing per-file options (e.g. -g1 from HOST_LINEINFO). -if(DEFINE_ASSERT) - set_property(SOURCE ${CUOPT_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} - APPEND PROPERTY COMPILE_OPTIONS "-UNDEBUG") -endif() - -# Add gRPC mapper files and generated protobuf sources -set(GRPC_INFRA_FILES - ${PROTO_SRCS} - ${GRPC_PROTO_SRCS} - ${GRPC_SERVICE_SRCS} - src/grpc/grpc_problem_mapper.cpp - src/grpc/grpc_solution_mapper.cpp - src/grpc/grpc_settings_mapper.cpp - src/grpc/grpc_service_mapper.cpp - src/grpc/client/grpc_client.cpp - src/grpc/client/solve_remote.cpp -) -list(APPEND CUOPT_SRC_FILES ${GRPC_INFRA_FILES}) - -# Always keep NDEBUG defined for gRPC infrastructure files so that abseil -# headers inline Mutex::Dtor() instead of emitting an external call. -# The conda-forge abseil shared library is built with NDEBUG and does not -# export that symbol (abseil-cpp#1624). Without this, Debug builds fail -# at runtime with "undefined symbol: absl::…::Mutex::Dtor". -set_property(SOURCE ${GRPC_INFRA_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} - APPEND PROPERTY COMPILE_OPTIONS "-DNDEBUG") +if (DEFINE_ASSERT) + set_property(SOURCE ${CUOPT_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} + APPEND PROPERTY COMPILE_OPTIONS "-UNDEBUG") +endif () + +if (NOT SKIP_GRPC_BUILD) + # Add gRPC mapper files and generated protobuf sources + set(GRPC_INFRA_FILES + ${PROTO_SRCS} + ${GRPC_PROTO_SRCS} + ${GRPC_SERVICE_SRCS} + src/grpc/grpc_problem_mapper.cpp + src/grpc/grpc_solution_mapper.cpp + src/grpc/grpc_settings_mapper.cpp + src/grpc/grpc_service_mapper.cpp + src/grpc/client/grpc_client.cpp + src/grpc/client/solve_remote.cpp + ) + list(APPEND CUOPT_SRC_FILES ${GRPC_INFRA_FILES}) + + # Always keep NDEBUG defined for gRPC infrastructure files so that abseil + # headers inline Mutex::Dtor() instead of emitting an external call. + # The conda-forge abseil shared library is built with NDEBUG and does not + # export that symbol (abseil-cpp#1624). Without this, Debug builds fail + # at runtime with "undefined symbol: absl::…::Mutex::Dtor". + set_property(SOURCE ${GRPC_INFRA_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} + APPEND PROPERTY COMPILE_OPTIONS "-DNDEBUG") +endif (NOT SKIP_GRPC_BUILD) add_library(cuopt SHARED - ${CUOPT_SRC_FILES} + ${CUOPT_SRC_FILES} ) set_target_properties(cuopt - PROPERTIES BUILD_RPATH "\$ORIGIN" - INSTALL_RPATH "\$ORIGIN" - - # set target compile options - CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 20 - CUDA_STANDARD_REQUIRED ON - INTERFACE_POSITION_INDEPENDENT_CODE ON - CXX_SCAN_FOR_MODULES OFF + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + + # set target compile options + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + CXX_SCAN_FOR_MODULES OFF ) target_compile_definitions(cuopt PUBLIC "CUOPT_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${LIBCUOPT_LOGGING_LEVEL}") target_compile_options(cuopt - PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" - "$<$:${CUOPT_CUDA_FLAGS}>" + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + "$<$:${CUOPT_CUDA_FLAGS}>" ) -if(WRITE_FATBIN) - file(WRITE "${CUOPT_BINARY_DIR}/fatbin.ld" - [=[ +if (WRITE_FATBIN) + file(WRITE "${CUOPT_BINARY_DIR}/fatbin.ld" + [=[ SECTIONS { .nvFatBinSegment : { *(.nvFatBinSegment) } .nv_fatbin : { *(.nv_fatbin) } } ]=]) - target_link_options(cuopt PRIVATE "${CUOPT_BINARY_DIR}/fatbin.ld") -endif() + target_link_options(cuopt PRIVATE "${CUOPT_BINARY_DIR}/fatbin.ld") +endif () add_library(cuopt::cuopt ALIAS cuopt) # ################################################################################################## @@ -463,29 +468,29 @@ message(STATUS "target include directories CUDSS_INCLUDES = ${CUDSS_INCLUDE}") # Adding Papilo as a system include messes up clang's include resolution if papilo is already installed as a conda package target_include_directories(cuopt PRIVATE - "${papilo_SOURCE_DIR}/src" - "${papilo_BINARY_DIR}" + "${papilo_SOURCE_DIR}/src" + "${papilo_BINARY_DIR}" ) target_include_directories(cuopt SYSTEM PRIVATE - "${pslp_SOURCE_DIR}/include" + "${pslp_SOURCE_DIR}/include" ) target_include_directories(cuopt - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty" - "${CMAKE_CURRENT_SOURCE_DIR}/src" - "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc" - "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc/client" - "${CMAKE_CURRENT_BINARY_DIR}" - "${CUDSS_INCLUDE}" - PUBLIC - "$" - "$" - "$" - INTERFACE - "$" - ${CUDSS_INCLUDE} + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty" + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc" + "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc/client" + "${CMAKE_CURRENT_BINARY_DIR}" + "${CUDSS_INCLUDE}" + PUBLIC + "$" + "$" + "$" + INTERFACE + "$" + ${CUDSS_INCLUDE} ) # Link PSLP by file to avoid export dependency tracking @@ -496,10 +501,10 @@ add_dependencies(cuopt PSLP) # - link libraries -------------------------------------------------------------------------------- set(CUOPT_PRIVATE_CUDA_LIBS - CUDA::curand - CUDA::cusolver - TBB::tbb - OpenMP::OpenMP_CXX) + CUDA::curand + CUDA::cusolver + TBB::tbb + OpenMP::OpenMP_CXX) list(PREPEND CUOPT_PRIVATE_CUDA_LIBS CUDA::cublasLt) @@ -512,19 +517,19 @@ get_filename_component(CUDSS_MT_LIB_FILE_NAME "${CUDSS_MT_LIB_FILE}" NAME) target_compile_definitions(cuopt PRIVATE CUDSS_MT_LIB_FILE_NAME="${CUDSS_MT_LIB_FILE_NAME}") execute_process( - COMMAND git rev-parse --short HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_HASH - OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND git rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE ) message("-- Building with GIT_COMMIT_HASH = '${GIT_COMMIT_HASH}'") # Generate build_info.hpp from template # configure_file() only updates the output if content changes, avoiding unnecessary rebuilds configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/src/utilities/build_info.hpp.in - ${CMAKE_CURRENT_BINARY_DIR}/include/utilities/build_info.hpp - @ONLY + ${CMAKE_CURRENT_SOURCE_DIR}/src/utilities/build_info.hpp.in + ${CMAKE_CURRENT_BINARY_DIR}/include/utilities/build_info.hpp + @ONLY ) # Add the generated include directory @@ -532,32 +537,32 @@ target_include_directories(cuopt PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include) list(JOIN CMAKE_CUDA_ARCHITECTURES "," JOINED_CUDA_ARCHITECTURES) target_compile_definitions(cuopt PUBLIC - CUOPT_CUDA_ARCHITECTURES="${JOINED_CUDA_ARCHITECTURES}" - CUOPT_CPU_ARCHITECTURE="${CMAKE_SYSTEM_PROCESSOR}") + CUOPT_CUDA_ARCHITECTURES="${JOINED_CUDA_ARCHITECTURES}" + CUOPT_CPU_ARCHITECTURE="${CMAKE_SYSTEM_PROCESSOR}") target_link_libraries(cuopt - PUBLIC - CUDA::cublas - CUDA::cusparse - rmm::rmm - rapids_logger::rapids_logger - CCCL::CCCL - raft::raft - cuopt::mps_parser - ${CUDSS_LIB_FILE} - PRIVATE - ${CUOPT_PRIVATE_CUDA_LIBS} - protobuf::libprotobuf - gRPC::grpc++ - ) + PUBLIC + CUDA::cublas + CUDA::cusparse + rmm::rmm + rapids_logger::rapids_logger + CCCL::CCCL + raft::raft + cuopt::mps_parser + ${CUDSS_LIB_FILE} + PRIVATE + ${CUOPT_PRIVATE_CUDA_LIBS} + $<$:protobuf::libprotobuf> + $<$:gRPC::grpc++> +) # ################################################################################################## # - generate tests -------------------------------------------------------------------------------- -if(BUILD_TESTS) - include(CTest) - add_subdirectory(tests) -endif(BUILD_TESTS) +if (BUILD_TESTS) + include(CTest) + add_subdirectory(tests) +endif (BUILD_TESTS) # ################################################################################################## # - install targets ------------------------------------------------------------------------------- @@ -568,46 +573,46 @@ set(CPACK_COMPONENTS_ALL runtime dev) set(CPACK_PACKAGING_INSTALL_PREFIX "/usr/local") #If using cpack to create a deb package -if(CPACK_GENERATOR STREQUAL "DEB") - set(_BIN_DEST "bin") - set(_LIB_DEST "lib") - set(_INCLUDE_DEST "lib/cuopt") - -#If building locally use the Default install paths(e.g. for local development or other package types) -else() - set(_BIN_DEST "${CMAKE_INSTALL_BINDIR}") - set(_LIB_DEST "${lib_dir}") - set(_INCLUDE_DEST include/cuopt/) -endif() +if (CPACK_GENERATOR STREQUAL "DEB") + set(_BIN_DEST "bin") + set(_LIB_DEST "lib") + set(_INCLUDE_DEST "lib/cuopt") + + #If building locally use the Default install paths(e.g. for local development or other package types) +else () + set(_BIN_DEST "${CMAKE_INSTALL_BINDIR}") + set(_LIB_DEST "${lib_dir}") + set(_INCLUDE_DEST include/cuopt/) +endif () # adds the .so files to the runtime deb package install(TARGETS cuopt mps_parser - DESTINATION ${_LIB_DEST} - COMPONENT runtime - EXPORT cuopt-exports + DESTINATION ${_LIB_DEST} + COMPONENT runtime + EXPORT cuopt-exports ) # adds the .so files to the development deb package install(TARGETS cuopt mps_parser - DESTINATION ${_LIB_DEST} - COMPONENT dev + DESTINATION ${_LIB_DEST} + COMPONENT dev ) # adds the header files to the development deb package install(DIRECTORY include/cuopt/ - DESTINATION ${_INCLUDE_DEST} - COMPONENT dev + DESTINATION ${_INCLUDE_DEST} + COMPONENT dev ) # adds the version header file to the development deb package install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuopt/version_config.hpp - DESTINATION ${_INCLUDE_DEST} - COMPONENT dev + DESTINATION ${_INCLUDE_DEST} + COMPONENT dev ) # ############################################################################################### # - install export ------------------------------------------------------------------------------- set(doc_string - [=[ + [=[ Provide targets for cuOpt. cuOpt library is a collection of GPU accelerated combinatorial optimization algorithms. @@ -615,19 +620,19 @@ cuOpt library is a collection of GPU accelerated combinatorial optimization algo ]=]) rapids_export(INSTALL cuopt - EXPORT_SET cuopt-exports - GLOBAL_TARGETS cuopt - NAMESPACE cuopt:: - DOCUMENTATION doc_string + EXPORT_SET cuopt-exports + GLOBAL_TARGETS cuopt + NAMESPACE cuopt:: + DOCUMENTATION doc_string ) # ############################################################################################### # - build export ------------------------------------------------------------------------------- rapids_export(BUILD cuopt - EXPORT_SET cuopt-exports - GLOBAL_TARGETS cuopt - NAMESPACE cuopt:: - DOCUMENTATION doc_string + EXPORT_SET cuopt-exports + GLOBAL_TARGETS cuopt + NAMESPACE cuopt:: + DOCUMENTATION doc_string ) # ################################################################################################## @@ -638,206 +643,208 @@ rapids_export(BUILD cuopt # doc targets for cuOpt find_package(Doxygen) -if(Doxygen_FOUND) - add_custom_command(OUTPUT CUOPT_DOXYGEN - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doxygen - COMMAND doxygen Doxyfile - VERBATIM) +if (Doxygen_FOUND) + add_custom_command(OUTPUT CUOPT_DOXYGEN + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doxygen + COMMAND doxygen Doxyfile + VERBATIM) - add_custom_target(docs_cuopt DEPENDS CUOPT_DOXYGEN) -endif() + add_custom_target(docs_cuopt DEPENDS CUOPT_DOXYGEN) +endif () rapids_cpm_find( - argparse 3.2.0 - GLOBAL_TARGETS argparse::argparse - CPM_ARGS - GIT_REPOSITORY https://github.com/p-ranav/argparse.git - GIT_TAG v3.2 - GIT_SHALLOW TRUE -) - -if(NOT BUILD_LP_ONLY) -add_executable(cuopt_cli cuopt_cli.cpp) - -# PIE executable: auditwheel/patchelf expands .dynstr/RPATH when repairing wheels; non-PIE -# (ET_EXEC) binaries are prone to corrupt segment layout. PIE (ET_DYN) survives RPATH edits. -set_target_properties(cuopt_cli - PROPERTIES - CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CXX_SCAN_FOR_MODULES OFF - POSITION_INDEPENDENT_CODE ON -) - -target_compile_options(cuopt_cli - PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" - "$<$:${CUOPT_CUDA_FLAGS}>" -) - -target_link_options(cuopt_cli PRIVATE -pie) - -target_include_directories(cuopt_cli - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/src" - PUBLIC - "$" - "$" - ${CUDSS_INCLUDE} - "$" -) - -target_link_libraries(cuopt_cli - PUBLIC - cuopt - OpenMP::OpenMP_CXX - ${CUDSS_LIBRARIES} - TBB::tbb - PRIVATE - argparse::argparse -) - # Use RUNPATH when building locally in order to allow LD_LIBRARY_PATH to override the conda env path -if(NOT DEFINED INSTALL_TARGET OR "${INSTALL_TARGET}" STREQUAL "") - target_link_options(cuopt_cli PRIVATE -Wl,--enable-new-dtags) -endif() -set_property(TARGET cuopt_cli PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}") - -# adds the cuopt_cli executable to the runtime deb package -install(TARGETS cuopt_cli - COMPONENT runtime - RUNTIME DESTINATION ${_BIN_DEST} -) -endif() + argparse 3.2.0 + GLOBAL_TARGETS argparse::argparse + CPM_ARGS + GIT_REPOSITORY https://github.com/p-ranav/argparse.git + GIT_TAG v3.2 + GIT_SHALLOW TRUE +) + +if (NOT BUILD_LP_ONLY) + add_executable(cuopt_cli cuopt_cli.cpp) + + # PIE executable: auditwheel/patchelf expands .dynstr/RPATH when repairing wheels; non-PIE + # (ET_EXEC) binaries are prone to corrupt segment layout. PIE (ET_DYN) survives RPATH edits. + set_target_properties(cuopt_cli + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CXX_SCAN_FOR_MODULES OFF + POSITION_INDEPENDENT_CODE ON + ) + + target_compile_options(cuopt_cli + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + "$<$:${CUOPT_CUDA_FLAGS}>" + ) + + target_link_options(cuopt_cli PRIVATE -pie) + + target_include_directories(cuopt_cli + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + PUBLIC + "$" + "$" + ${CUDSS_INCLUDE} + "$" + ) + + target_link_libraries(cuopt_cli + PUBLIC + cuopt + OpenMP::OpenMP_CXX + ${CUDSS_LIBRARIES} + TBB::tbb + PRIVATE + argparse::argparse + ) + # Use RUNPATH when building locally in order to allow LD_LIBRARY_PATH to override the conda env path + if (NOT DEFINED INSTALL_TARGET OR "${INSTALL_TARGET}" STREQUAL "") + target_link_options(cuopt_cli PRIVATE -Wl,--enable-new-dtags) + endif () + set_property(TARGET cuopt_cli PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}") + + # adds the cuopt_cli executable to the runtime deb package + install(TARGETS cuopt_cli + COMPONENT runtime + RUNTIME DESTINATION ${_BIN_DEST} + ) +endif () option(BUILD_MIP_BENCHMARKS "Build MIP benchmarks" OFF) -if(BUILD_MIP_BENCHMARKS AND NOT BUILD_LP_ONLY) - add_executable(solve_MIP ../benchmarks/linear_programming/cuopt/run_mip.cpp) - target_include_directories(solve_MIP - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/src" - PUBLIC - "$" - ) - - set_target_properties(solve_MIP - PROPERTIES - CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CXX_SCAN_FOR_MODULES OFF - ) - - target_compile_options(solve_MIP - PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" - "$<$:${CUOPT_CUDA_FLAGS}>" - ) - target_link_libraries(solve_MIP - PUBLIC - cuopt - OpenMP::OpenMP_CXX - PRIVATE - ) - if(NOT DEFINED INSTALL_TARGET OR "${INSTALL_TARGET}" STREQUAL "") - target_link_options(solve_MIP PRIVATE -Wl,--enable-new-dtags) - endif() - - target_include_directories(solve_MIP - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/src" - ) - -endif() +if (BUILD_MIP_BENCHMARKS AND NOT BUILD_LP_ONLY) + add_executable(solve_MIP ../benchmarks/linear_programming/cuopt/run_mip.cpp) + target_include_directories(solve_MIP + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + PUBLIC + "$" + ) + + set_target_properties(solve_MIP + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CXX_SCAN_FOR_MODULES OFF + ) + + target_compile_options(solve_MIP + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + "$<$:${CUOPT_CUDA_FLAGS}>" + ) + target_link_libraries(solve_MIP + PUBLIC + cuopt + OpenMP::OpenMP_CXX + PRIVATE + ) + if (NOT DEFINED INSTALL_TARGET OR "${INSTALL_TARGET}" STREQUAL "") + target_link_options(solve_MIP PRIVATE -Wl,--enable-new-dtags) + endif () + + target_include_directories(solve_MIP + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + ) + +endif () option(BUILD_LP_BENCHMARKS "Build LP benchmarks" OFF) -if(BUILD_LP_BENCHMARKS) - add_executable(solve_LP ../benchmarks/linear_programming/cuopt/run_pdlp.cu) - - set_target_properties(solve_LP - PROPERTIES - CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 20 - CUDA_STANDARD_REQUIRED ON - CXX_SCAN_FOR_MODULES OFF - ) - - target_compile_options(solve_LP - PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" - "$<$:${CUOPT_CUDA_FLAGS}>" - ) - target_link_libraries(solve_LP - PUBLIC - cuopt - OpenMP::OpenMP_CXX - PRIVATE - ) - if(NOT DEFINED INSTALL_TARGET OR "${INSTALL_TARGET}" STREQUAL "") - target_link_options(solve_LP PRIVATE -Wl,--enable-new-dtags) - endif() -endif() +if (BUILD_LP_BENCHMARKS) + add_executable(solve_LP ../benchmarks/linear_programming/cuopt/run_pdlp.cu) + + set_target_properties(solve_LP + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON + CXX_SCAN_FOR_MODULES OFF + ) + + target_compile_options(solve_LP + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + "$<$:${CUOPT_CUDA_FLAGS}>" + ) + target_link_libraries(solve_LP + PUBLIC + cuopt + OpenMP::OpenMP_CXX + PRIVATE + ) + if (NOT DEFINED INSTALL_TARGET OR "${INSTALL_TARGET}" STREQUAL "") + target_link_options(solve_LP PRIVATE -Wl,--enable-new-dtags) + endif () +endif () # ################################################################################################## # - cuopt_grpc_server - gRPC-based remote server -------------------------------------------------- -add_executable(cuopt_grpc_server - src/grpc/server/grpc_server_main.cpp - src/grpc/server/grpc_server_logger.cpp - src/grpc/server/grpc_worker.cpp - src/grpc/server/grpc_worker_infra.cpp - src/grpc/server/grpc_server_threads.cpp - src/grpc/server/grpc_pipe_io.cpp - src/grpc/server/grpc_job_management.cpp - src/grpc/server/grpc_service_impl.cpp -) - -set_target_properties(cuopt_grpc_server - PROPERTIES - CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON - CXX_SCAN_FOR_MODULES OFF -) - -target_compile_options(cuopt_grpc_server - PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" -) - -target_include_directories(cuopt_grpc_server - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/src" - "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc" - "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc/server" - "${CMAKE_CURRENT_SOURCE_DIR}/include" - "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include" - "${CMAKE_CURRENT_BINARY_DIR}" - PUBLIC - "$" - "$" -) - -find_library(UUID_LIBRARY uuid REQUIRED) - -target_link_libraries(cuopt_grpc_server - PUBLIC - cuopt - OpenMP::OpenMP_CXX - PRIVATE - protobuf::libprotobuf - gRPC::grpc++ - ${UUID_LIBRARY} - argparse::argparse -) - -# Use RUNPATH when building locally -target_link_options(cuopt_grpc_server PRIVATE -Wl,--enable-new-dtags) -set_property(TARGET cuopt_grpc_server PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}") - -# Install the grpc server executable -install(TARGETS cuopt_grpc_server - COMPONENT runtime - RUNTIME DESTINATION ${_BIN_DEST} -) - -message(STATUS "Building cuopt_grpc_server (gRPC-based remote solve server)") +if (NOT SKIP_GRPC_BUILD) + add_executable(cuopt_grpc_server + src/grpc/server/grpc_server_main.cpp + src/grpc/server/grpc_server_logger.cpp + src/grpc/server/grpc_worker.cpp + src/grpc/server/grpc_worker_infra.cpp + src/grpc/server/grpc_server_threads.cpp + src/grpc/server/grpc_pipe_io.cpp + src/grpc/server/grpc_job_management.cpp + src/grpc/server/grpc_service_impl.cpp + ) + + set_target_properties(cuopt_grpc_server + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CXX_SCAN_FOR_MODULES OFF + ) + + target_compile_options(cuopt_grpc_server + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + ) + + target_include_directories(cuopt_grpc_server + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc" + "${CMAKE_CURRENT_SOURCE_DIR}/src/grpc/server" + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include" + "${CMAKE_CURRENT_BINARY_DIR}" + PUBLIC + "$" + "$" + ) + + find_library(UUID_LIBRARY uuid REQUIRED) + + target_link_libraries(cuopt_grpc_server + PUBLIC + cuopt + OpenMP::OpenMP_CXX + PRIVATE + protobuf::libprotobuf + gRPC::grpc++ + ${UUID_LIBRARY} + argparse::argparse + ) + + # Use RUNPATH when building locally + target_link_options(cuopt_grpc_server PRIVATE -Wl,--enable-new-dtags) + set_property(TARGET cuopt_grpc_server PROPERTY INSTALL_RPATH "$ORIGIN/../${lib_dir}") + + # Install the grpc server executable + install(TARGETS cuopt_grpc_server + COMPONENT runtime + RUNTIME DESTINATION ${_BIN_DEST} + ) + + message(STATUS "Building cuopt_grpc_server (gRPC-based remote solve server)") +endif (NOT SKIP_GRPC_BUILD) # ################################################################################################## # - CPack has to be the last item in the cmake file------------------------------------------------- diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index be01516657..bcb50b3235 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -624,6 +624,7 @@ std::unique_ptr> solve_mip( try { // Check if remote execution is enabled (always uses CPU backend) +#ifdef CUOPT_ENABLE_GRPC if (is_remote_execution_enabled()) { auto* cpu_prob = dynamic_cast*>(problem_interface); cuopt_expects(cpu_prob != nullptr, @@ -631,6 +632,7 @@ std::unique_ptr> solve_mip( "Remote execution requires CPU memory backend"); return solve_mip_remote(*cpu_prob, settings); } +#endif // Local execution - dispatch to appropriate overload based on problem type auto* cpu_prob = dynamic_cast*>(problem_interface); diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 29a7f32db6..65d40a5fa1 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -1742,6 +1742,7 @@ std::unique_ptr> solve_lp( "problem_interface cannot be null"); // Check if remote execution is enabled (always uses CPU backend) +#ifdef CUOPT_ENABLE_GRPC if (is_remote_execution_enabled()) { cuopt_expects(!is_batch_mode, error_type_t::ValidationError, @@ -1753,6 +1754,7 @@ std::unique_ptr> solve_lp( "Remote execution requires CPU memory backend"); return solve_lp_remote(*cpu_prob, settings); } +#endif // Local execution - dispatch to appropriate overload based on problem type auto* cpu_prob = dynamic_cast*>(problem_interface); From d14bea8b95378e38ec7511a97570fae07623ce78 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 21 Apr 2026 17:21:18 +0200 Subject: [PATCH 32/53] fixed deadlock Signed-off-by: Nicolas L. Guidotti --- cpp/CMakeLists.txt | 10 +++------- cpp/src/branch_and_bound/branch_and_bound.cpp | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3b4e085e8b..62ee196adc 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -113,7 +113,7 @@ endif (BUILD_SANITIZER) # To use ThreadSanitizer: # 1. Build with clang and the -tsan flag -# 2. Run the binary with env var set: OMP_TOOL_LIBRARIES=/usr/lib/llvm-17/lib/libarcher.so ARCHER_OPTIONS='verbose=1' TSAN_OPTIONS='suppresions=cpp/utilities/tsan_suppressions.txt:ignore_noninstrumented_modules=1:halt_on_error=1' +# 2. Run the binary with env var set: OMP_TOOL_LIBRARIES=/usr/lib/llvm-17/lib/libarcher.so ARCHER_OPTIONS='verbose=1' TSAN_OPTIONS='suppressions=cpp/utilities/tsan_suppressions.txt:ignore_noninstrumented_modules=1:halt_on_error=1' # Replace with local llvm install path. libarcher.so must be presetn if (BUILD_TSAN) message(STATUS "Building with ThreadSanitizer enabled") @@ -195,12 +195,8 @@ if (PARALLEL_LEVEL AND NOT "${PARALLEL_LEVEL}" STREQUAL "") endif () endif () -find_package(OpenMP) - -if (OPENMP_FOUND) - message(VERBOSE "cuOpt: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}") -endif () - +find_package(OpenMP REQUIRED) +message(VERBOSE "cuOpt: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}") # Debug options if (CMAKE_BUILD_TYPE MATCHES Debug) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index fb3f6e2227..a65c37d841 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1821,7 +1821,7 @@ template void branch_and_bound_t::single_threaded_solve() { bfs_worker_pool_.init(1, original_lp_, Arow_, var_types_, settings_); - bfs_worker_t* worker = bfs_worker_pool_[0]; + bfs_worker_t* worker = bfs_worker_pool_.pop_idle_worker(); node_queue_t& node_queue = worker->node_queue; node_queue.push(search_tree_.root.get_down_child()); From 978fefd5c634487bbebe2bcbffcde4ab9a460fb8 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 22 Apr 2026 11:36:28 +0200 Subject: [PATCH 33/53] added asserts to ensure correctness Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 54 +++++++++++-------- cpp/src/branch_and_bound/node_queue.hpp | 9 ++++ cpp/src/branch_and_bound/pseudo_costs.cpp | 3 +- cpp/src/branch_and_bound/worker.hpp | 9 ++++ cpp/src/branch_and_bound/worker_pool.hpp | 25 ++++++++- cpp/src/utilities/circular_deque.hpp | 2 + 6 files changed, 77 insertions(+), 25 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index a65c37d841..a395012d03 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -834,7 +834,7 @@ branch_variable_t branch_and_bound_t::variable_selection( std::vector& solution = worker->leaf_solution.x; switch (worker->search_strategy) { - case search_strategy_t::BEST_FIRST: + case BEST_FIRST: if (settings_.reliability_branching != 0) { branch_var = pc_.reliable_variable_selection(node_ptr, @@ -856,17 +856,18 @@ branch_variable_t branch_and_bound_t::variable_selection( return {branch_var, round_dir}; - case search_strategy_t::COEFFICIENT_DIVING: + case COEFFICIENT_DIVING: return coefficient_diving( original_lp_, fractional, solution, var_up_locks_, var_down_locks_, log); - case search_strategy_t::LINE_SEARCH_DIVING: + case LINE_SEARCH_DIVING: return line_search_diving(fractional, solution, root_relax_soln_.x, log); - case search_strategy_t::PSEUDOCOST_DIVING: + case PSEUDOCOST_DIVING: return pseudocost_diving(pc_, fractional, solution, root_relax_soln_.x, log); - case search_strategy_t::GUIDED_DIVING: + case GUIDED_DIVING: + assert(incumbent_.has_incumbent); mutex_upper_.lock(); current_incumbent = incumbent_.x; mutex_upper_.unlock(); @@ -1442,6 +1443,9 @@ template void branch_and_bound_t::plunge_with(bfs_worker_t* worker, mip_node_t* start_node) { + assert(worker != nullptr && worker->is_active.load()); + assert(start_node != nullptr); + // Stack holds at most 2 entries: the preferred child + its sibling. // The sibling is evicted to the queue before a new pair of children is added. circular_deque_t*> stack(4); @@ -1458,6 +1462,23 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, rel_gap > settings_.relative_mip_gap_tol && abs_gap > settings_.absolute_mip_gap_tol) { if (worker->worker_id == 0) { repair_heuristic_solutions(); } + // Launch a new diving task if any worker is idle + if (worker->total_active_diving_workers < worker->total_max_diving_workers && + worker->node_queue.diving_queue_size() > 0) { + launch_diving_worker(worker); + } + + // If any best-first worker become idle, + if (bfs_worker_pool_.num_idle_workers() > 0 && worker->node_queue.best_first_queue_size() > 0) { + worker->node_queue.lock(); + mip_node_t* node = worker->node_queue.bfs_top(); + if (node != nullptr) { + if (launch_bfs_worker(node)) { worker->node_queue.pop_best_first(); } + } + worker->node_queue.unlock(); + } + + assert(stack.size() <= 2); mip_node_t* node_ptr = stack.front(); stack.pop_front(); @@ -1576,22 +1597,6 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, upper_bound = upper_bound_; rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); - - // Launch a new diving task if any worker is idle - if (worker->total_active_diving_workers < worker->total_max_diving_workers && - worker->node_queue.diving_queue_size() > 0) { - launch_diving_worker(worker); - } - - // If any best-first worker become idle, - if (bfs_worker_pool_.num_idle_workers() > 0 && worker->node_queue.best_first_queue_size() > 0) { - worker->node_queue.lock(); - mip_node_t* node = worker->node_queue.pop_best_first(); - if (node != nullptr) { - if (!launch_bfs_worker(node)) { worker->node_queue.push(node); } - } - worker->node_queue.unlock(); - } } // If the solver was forced to stop, but we still have nodes to explore @@ -1806,6 +1811,11 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* bfs_worker->active_diving_workers[strategy]++; bfs_worker->total_active_diving_workers++; + assert(bfs_worker->active_diving_workers[strategy].load() <= + bfs_worker->max_diving_workers[strategy]); + assert(bfs_worker->total_active_diving_workers.load() <= + bfs_worker->total_max_diving_workers); + #pragma omp task affinity(*diving_worker) dive_with(diving_worker); @@ -1826,6 +1836,8 @@ void branch_and_bound_t::single_threaded_solve() node_queue_t& node_queue = worker->node_queue; node_queue.push(search_tree_.root.get_down_child()); node_queue.push(search_tree_.root.get_up_child()); + worker->lower_bound = worker->node_queue.get_lower_bound(); + worker->is_active = true; best_first_search_with(worker); } diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 10b5c57701..3d8b5a5701 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -44,6 +44,7 @@ class heap_t { buffer.emplace_back(std::forward(args)...); std::push_heap(buffer.begin(), buffer.end(), comp); ++num_entries_; + assert(num_entries_.load() == buffer.size()); } T pop() @@ -52,6 +53,7 @@ class heap_t { T node = std::move(buffer.back()); buffer.pop_back(); --num_entries_; + assert(num_entries_.load() == buffer.size()); return node; } @@ -95,6 +97,7 @@ class node_queue_t { public: void push(mip_node_t* new_node) { + assert(new_node != nullptr); auto entry = std::make_shared(new_node); best_first_heap_.push(entry); diving_heap_.push(entry); @@ -108,6 +111,7 @@ class node_queue_t { lower_bound_ = best_first_heap_.empty() ? std::numeric_limits::infinity() : best_first_heap_.top()->lower_bound; mip_node_t* node = std::exchange(entry->node, nullptr); + assert(node != nullptr); return node; } @@ -123,6 +127,11 @@ class node_queue_t { void lock() { mutex_.lock(); } void unlock() { mutex_.unlock(); } + mip_node_t* bfs_top() + { + return best_first_heap_.empty() ? nullptr : best_first_heap_.top()->node; + } + i_t diving_queue_size() { return diving_heap_.size(); } i_t best_first_queue_size() { return best_first_heap_.size(); } diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index cf67a69046..8c29046302 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -1381,8 +1381,7 @@ i_t pseudo_costs_t::reliable_variable_selection( // If `reliable_threshold == 0`, then we set the uninitialized pseudocosts to the average. // Otherwise, the best ones are initialized via strong branching, while the other are ignored. // - // In the latter, we are not using the average pseudocost (which calculated in the `initialized` - // method). + // So we only need to initialize the average for the former. if (reliable_threshold == 0) { averages = compute_averages(); log.printf("PC: num initialized down %d up %d avg down %e up %e\n", diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 203ce945d2..428dc3cfa6 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -155,6 +155,10 @@ class bfs_worker_t : public branch_and_bound_worker_t { void init(mip_node_t* node) { + assert(!Base::is_active.load()); + assert(node_queue.best_first_queue_size() == 0); + assert(node != nullptr); + node_queue.push(node); Base::lower_bound = node->lower_bound; Base::is_active = true; @@ -241,6 +245,11 @@ class diving_worker_t : public branch_and_bound_worker_t { void set_inactive() { + assert(Base::is_active.load()); + assert(bfs_worker != nullptr); + assert(bfs_worker->active_diving_workers[Base::search_strategy].load() > 0); + assert(bfs_worker->total_active_diving_workers.load() > 0); + Base::is_active = false; --bfs_worker->active_diving_workers[Base::search_strategy]; --bfs_worker->total_active_diving_workers; diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index e8b3216b5b..93e2bde6cc 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -25,6 +25,9 @@ class worker_pool_t { const simplex_solver_settings_t& settings, const uint64_t rng_offset = 0) { + assert(!is_initialized); + assert(num_workers > 0); + workers_.resize(num_workers); num_idle_workers_ = num_workers; idle_workers_.clear_resize(num_workers); @@ -46,6 +49,8 @@ class worker_pool_t { i_t idx = idle_workers_.front(); idle_workers_.pop_front(); num_idle_workers_--; + assert(idle_workers_.size() == static_cast(num_idle_workers_.load())); + assert(idx >= 0 && static_cast(idx) < workers_.size()); return workers_[idx].get(); } } @@ -53,6 +58,12 @@ class worker_pool_t { void return_worker_to_pool(WorkerType* worker) { std::lock_guard lock(mutex_); + assert(worker != nullptr); + assert(workers_[worker->worker_id].get() == worker); + assert(!worker->is_active.load()); + assert(static_cast(num_idle_workers_.load()) == idle_workers_.size()); + assert(idle_workers_.size() <= workers_.size()); + idle_workers_.push_back(worker->worker_id); num_idle_workers_++; } @@ -70,8 +81,18 @@ class worker_pool_t { return lower_bound; } - WorkerType* operator[](i_t id) { return workers_[id].get(); } - WorkerType* operator[](i_t id) const { return workers_[id].get(); } + WorkerType* operator[](i_t id) + { + assert(id >= 0 && static_cast(id) < workers_.size()); + assert(workers_[id] != nullptr); + return workers_[id].get(); + } + WorkerType* operator[](i_t id) const + { + assert(id >= 0 && static_cast(id) < workers_.size()); + assert(workers_[id] != nullptr); + return workers_[id].get(); + } i_t num_idle_workers() const { return num_idle_workers_; } i_t num_workers() const { return workers_.size(); } diff --git a/cpp/src/utilities/circular_deque.hpp b/cpp/src/utilities/circular_deque.hpp index 3fa7756a75..6e9d8f5b05 100644 --- a/cpp/src/utilities/circular_deque.hpp +++ b/cpp/src/utilities/circular_deque.hpp @@ -32,6 +32,7 @@ class circular_deque_t { head_(0), tail_(0) { + assert(capacity > 0); } bool empty() const { return head_ == tail_; } @@ -42,6 +43,7 @@ class circular_deque_t { void clear_resize(size_t new_capacity) { + assert(new_capacity > 0); head_ = 0; tail_ = 0; capacity_ = new_capacity + 1; From c243d002bee29449cc33eefbd5a7b30617215116 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 22 Apr 2026 15:00:58 +0200 Subject: [PATCH 34/53] added work stealing Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 25 ++++++- cpp/src/branch_and_bound/mip_node.hpp | 4 ++ cpp/src/branch_and_bound/node_queue.hpp | 7 +- cpp/src/branch_and_bound/worker.hpp | 70 ++++++++++++------- .../dual_simplex/simplex_solver_settings.hpp | 17 ++++- cpp/src/utilities/pcgenerator.hpp | 66 ++++++++++++----- 6 files changed, 139 insertions(+), 50 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index a395012d03..a84ebbbb68 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1618,6 +1618,11 @@ bfs_worker_t* branch_and_bound_t::launch_bfs_worker( bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); if (!idle_worker) { return nullptr; } + if (toc(exploration_stats_.start_time) > settings_.time_limit || + solver_status_ != mip_status_t::UNSET) { + return nullptr; + } + assert(start_node != nullptr); idle_worker->init(start_node); @@ -1630,9 +1635,10 @@ bfs_worker_t* branch_and_bound_t::launch_bfs_worker( template void branch_and_bound_t::best_first_search_with(bfs_worker_t* worker) { - f_t lower_bound = get_lower_bound(); - f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + f_t lower_bound = get_lower_bound(); + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + f_t steal_chance = settings_.bnb_node_steal_chance >= 0 ? settings_.bnb_node_steal_chance : 0.05; worker->calculate_num_diving_workers(bfs_worker_pool_.num_workers(), diving_worker_pool_.num_workers(), @@ -1683,6 +1689,14 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t solver_status_ = mip_status_t::OPTIMAL; break; } + + if (worker->node_queue.best_first_queue_size() == 0 || + worker->rng.next_double() < steal_chance) { + for (i_t i = 0; i < settings_.bnb_max_steal_attempts; ++i) { + i_t k = worker->rng.uniform(0, bfs_worker_pool_.num_workers()); + if (worker->steal_node_from(bfs_worker_pool_[k], settings_.bnb_nodes_per_steal)) { break; } + } + } } worker->set_inactive(); @@ -1802,6 +1816,11 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* return false; } + if (toc(exploration_stats_.start_time) > settings_.time_limit || + solver_status_ != mip_status_t::UNSET) { + return false; + } + for (int i = 1; i < num_search_strategies; ++i) { auto strategy = search_strategies[i]; diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index 61228b1a8d..70c5ae416d 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -295,6 +295,10 @@ class mip_node_t { std::vector vstatus; + // Indicate if we can dive from this node or not. This is set to false when + // this node was already selected for diving once. + bool can_dive{true}; + // Worker-local identification for deterministic ordering: // - origin_worker_id: which worker created this node // - creation_seq: sequence number within that worker (cumulative across horizons, serial) diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 3d8b5a5701..3c9ddea8a9 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -100,7 +100,7 @@ class node_queue_t { assert(new_node != nullptr); auto entry = std::make_shared(new_node); best_first_heap_.push(entry); - diving_heap_.push(entry); + if (new_node->can_dive) diving_heap_.push(entry); lower_bound_ = best_first_heap_.top()->lower_bound; } @@ -119,7 +119,10 @@ class node_queue_t { { while (!diving_heap_.empty()) { auto entry = diving_heap_.pop(); - if (entry->node != nullptr) { return entry->node; } + if (entry->node != nullptr) { + entry->node->can_dive = false; + return entry->node; + } } return nullptr; } diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 428dc3cfa6..38058cbb9e 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -133,9 +133,9 @@ class bfs_worker_t : public branch_and_bound_worker_t { uint64_t rng_offset = 0) : Base(worker_id, original_lp, Arow, var_type, settings, rng_offset) { - Base::start_lower = original_lp.lower; - Base::start_upper = original_lp.upper; - Base::search_strategy = BEST_FIRST; + this->start_lower = original_lp.lower; + this->start_upper = original_lp.upper; + this->search_strategy = BEST_FIRST; max_diving_workers.fill(0); active_diving_workers.fill(0); @@ -146,8 +146,8 @@ class bfs_worker_t : public branch_and_bound_worker_t { { f_t lower_bound = std::numeric_limits::infinity(); - if (Base::is_active) { - lower_bound = std::min(node_queue.get_lower_bound(), Base::lower_bound.load()); + if (this->is_active) { + lower_bound = std::min(node_queue.get_lower_bound(), this->lower_bound.load()); } return lower_bound; @@ -155,16 +155,38 @@ class bfs_worker_t : public branch_and_bound_worker_t { void init(mip_node_t* node) { - assert(!Base::is_active.load()); + assert(!this->is_active.load()); assert(node_queue.best_first_queue_size() == 0); assert(node != nullptr); node_queue.push(node); - Base::lower_bound = node->lower_bound; - Base::is_active = true; + this->lower_bound = node->lower_bound; + this->is_active = true; } - void set_inactive() { Base::is_active = false; } + void set_inactive() { this->is_active = false; } + + bool steal_node_from(bfs_worker_t* other, i_t num_nodes) + { + assert(num_nodes > 0); + + if (!other->is_active || this == other || + other->node_queue.best_first_queue_size() < 2 * num_nodes) { + return false; + } + + other->node_queue.lock(); + this->node_queue.lock(); + while (num_nodes > 0 && other->node_queue.best_first_queue_size() > 1) { + mip_node_t* node = other->node_queue.pop_best_first(); + this->node_queue.push(node); + --num_nodes; + } + this->node_queue.unlock(); + other->node_queue.unlock(); + + return true; + } void calculate_num_diving_workers(i_t num_bfs_workers, i_t total_diving_workers, @@ -188,8 +210,8 @@ class bfs_worker_t : public branch_and_bound_worker_t { i_t workers_per_type = end - start; // Calculate the number of diving workers allocated to this (best-first) worker - start = std::floor((double)Base::worker_id * workers_per_type / num_bfs_workers); - end = std::floor((double)(Base::worker_id + 1) * workers_per_type / num_bfs_workers); + start = std::floor((double)this->worker_id * workers_per_type / num_bfs_workers); + end = std::floor((double)(this->worker_id + 1) * workers_per_type / num_bfs_workers); max_diving_workers[i] = end - start; total_max_diving_workers += max_diving_workers[i]; ++k; @@ -224,34 +246,34 @@ class diving_worker_t : public branch_and_bound_worker_t { void init(const mip_node_t* node, const lp_problem_t& original_lp) { start_node = node->detach_copy(); - Base::start_lower = original_lp.lower; - Base::start_upper = original_lp.upper; - Base::lower_bound = node->lower_bound; - Base::is_active = true; - std::fill(Base::bounds_changed.begin(), Base::bounds_changed.end(), false); - node->get_variable_bounds(Base::start_lower, Base::start_upper, Base::bounds_changed); + this->start_lower = original_lp.lower; + this->start_upper = original_lp.upper; + this->lower_bound = node->lower_bound; + this->is_active = true; + std::fill(this->bounds_changed.begin(), this->bounds_changed.end(), false); + node->get_variable_bounds(this->start_lower, this->start_upper, this->bounds_changed); } bool presolve_start_bounds(const simplex_solver_settings_t& settings) { - return Base::node_presolver.bounds_strengthening( - settings, Base::bounds_changed, Base::start_lower, Base::start_upper); + return this->node_presolver.bounds_strengthening( + settings, this->bounds_changed, this->start_lower, this->start_upper); } f_t get_lower_bound() { - return Base::is_active ? Base::lower_bound.load() : std::numeric_limits::infinity(); + return this->is_active ? this->lower_bound.load() : std::numeric_limits::infinity(); } void set_inactive() { - assert(Base::is_active.load()); + assert(this->is_active.load()); assert(bfs_worker != nullptr); - assert(bfs_worker->active_diving_workers[Base::search_strategy].load() > 0); + assert(bfs_worker->active_diving_workers[this->search_strategy].load() > 0); assert(bfs_worker->total_active_diving_workers.load() > 0); - Base::is_active = false; - --bfs_worker->active_diving_workers[Base::search_strategy]; + this->is_active = false; + --bfs_worker->active_diving_workers[this->search_strategy]; --bfs_worker->total_active_diving_workers; } diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index cfc120e477..f9baf6f3da 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -110,6 +110,9 @@ struct simplex_solver_settings_t { mip_batch_pdlp_reliability_branching(0), strong_branching_simplex_iteration_limit(-1), random_seed(0), + bnb_node_steal_chance(-1), + bnb_nodes_per_steal(10), + bnb_max_steal_attempts(3), reliability_branching(-1), inside_mip(0), sub_mip(0), @@ -198,13 +201,21 @@ struct simplex_solver_settings_t { // PDLP only // Set the maximum number of simplex iterations allowed per trial branch when applying // strong branching to the root node. - // -1 - Automatic (iteration limit = 200) - // 0, 1 - Estimate the objective change using a single pivot of dual simplex - // >1 - Set as the iteration limit in dual simplex + // -1 - automatic (iteration limit = 200) + // 0, 1 - estimate the objective change using a single pivot of dual simplex + // >1 - set as the iteration limit in dual simplex i_t strong_branching_simplex_iteration_limit; diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics + // In B&B, indicate the chance in which a worker can steal a node from another worker. + // -1 - automatic (0.05) + // 0 - disable + // >0 - set the stealing chance [0, 1] + f_t bnb_node_steal_chance; + i_t bnb_nodes_per_steal; + i_t bnb_max_steal_attempts; + // Settings for the reliability branching. // - -1: automatic // - 0: disable (use pseudocost branching instead) diff --git a/cpp/src/utilities/pcgenerator.hpp b/cpp/src/utilities/pcgenerator.hpp index 29a865f02f..e83e5f36ad 100644 --- a/cpp/src/utilities/pcgenerator.hpp +++ b/cpp/src/utilities/pcgenerator.hpp @@ -21,12 +21,11 @@ class pcgenerator_t { static constexpr uint64_t default_stream = 0xda3e39cb94b95bdbULL; /** - * @brief ctor. Initializes the PCG - * @param rng_state is the generator state used for initializing the generator - * @param subsequence specifies the subsequence to be generated out of 2^64 possible subsequences - * In a parallel setting, like threads of a CUDA kernel, each thread is required to generate a - * unique set of random numbers. This can be achieved by initializing the generator with same - * rng_state for all the threads and diststreamt values for subsequence. + * @brief Initializes the PCG generator. + * @param seed Generator state seed. + * @param subsequence Selects one of 2^64 independent subsequences. Use distinct values per + * thread to guarantee non-overlapping streams in parallel contexts. + * @param offset Number of outputs to skip ahead before the first draw. */ pcgenerator_t(const uint64_t seed = default_seed, const uint64_t subsequence = default_stream, @@ -35,7 +34,12 @@ class pcgenerator_t { set_seed(seed, subsequence, offset); } - // Set the seed, subsequence and offset of the PCG + /** + * @brief Re-seeds the generator. + * @param seed Generator state seed. + * @param subsequence Selects one of 2^64 independent subsequences. + * @param offset Number of outputs to skip ahead before the first draw. + */ void set_seed(uint64_t seed, const uint64_t subsequence = default_stream, uint64_t offset = 0) { state = uint64_t(0); @@ -47,8 +51,12 @@ class pcgenerator_t { skipahead(offset); } - // Based on "Random Number Generation with Arbitrary Strides" F. B. Brown - // Link https://mcnp.lanl.gov/pdf_files/anl-rn-arb-stride.pdf + /** + * @brief Advances the generator state by @p offset steps in O(log offset) time. + * + * Uses the closed-form LCG jump described in "Random Number Generation with Arbitrary Strides" + * (F. B. Brown, https://mcnp.lanl.gov/pdf_files/anl-rn-arb-stride.pdf). + */ void skipahead(uint64_t offset) { uint64_t G = 1; @@ -68,9 +76,7 @@ class pcgenerator_t { } /** - * @defgroup NextRand Generate the next random number - * @brief This code is derived from PCG basic code - * @{ + * @returns the next uniformly distributed 32-bit unsigned integer. */ uint32_t next_u32() { @@ -83,6 +89,9 @@ class pcgenerator_t { return ret; } + /** + * @returns the next uniformly distributed 64-bit unsigned integer. + */ uint64_t next_u64() { uint64_t ret; @@ -93,6 +102,10 @@ class pcgenerator_t { return ret; } + /** + * @returns the next uniformly distributed non-negative 32-bit signed integer in [0, + * INT32_MAX]. + */ int32_t next_i32() { int32_t ret; @@ -102,6 +115,10 @@ class pcgenerator_t { return ret; } + /** + * @returns the next uniformly distributed non-negative 64-bit signed integer in [0, + * INT64_MAX]. + */ int64_t next_i64() { int64_t ret; @@ -111,10 +128,19 @@ class pcgenerator_t { return ret; } - float next_float() { return static_cast((next_u32() >> 8) * 0x1.0p-24); } + /** + * @returns a uniformly distributed float in [0, 1). + */ + float next_float() { return (next_u32() >> 8) * 0x1.0p-24; } - double next_double() { return static_cast((next_u64() >> 11) * 0x1.0p-53); } + /** + * @returns a uniformly distributed double in [0, 1). + */ + double next_double() { return (next_u64() >> 11) * 0x1.0p-53; } + /** + * @returns the next random value of type @p T. + */ template T next() { @@ -130,9 +156,11 @@ class pcgenerator_t { void next(float& ret) { ret = next_float(); } void next(double& ret) { ret = next_double(); } - /// Draws a sample from a uniform distribution. The samples are uniformly distributed over - /// the semi-closed interval `[low, high)`. This routine may have a **slight bias** toward - /// some numbers in the range (scaling by floating-point). + /** + * @brief Draws a sample from a uniform distribution over `[low, high)`. + * + * May have a slight bias toward some values due to floating-point scaling. + */ template T uniform(T low, T high) { @@ -141,7 +169,9 @@ class pcgenerator_t { return low + (val * range); } - // Shuffles the contents of a sequence using the Fisher–Yates algorithm. + /** + * @brief Shuffles @p seq in-place using the Fisher-Yates algorithm. + */ template void shuffle(std::vector& seq) { From 4fa6ba087293e6269fe374cff21689e5fa792c42 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 22 Apr 2026 15:22:05 +0200 Subject: [PATCH 35/53] added environment variable for tuning Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 2 +- cpp/src/dual_simplex/simplex_solver_settings.hpp | 4 ++-- cpp/src/mip_heuristics/solver.cu | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index a84ebbbb68..e770d77253 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1638,7 +1638,7 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t f_t lower_bound = get_lower_bound(); f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - f_t steal_chance = settings_.bnb_node_steal_chance >= 0 ? settings_.bnb_node_steal_chance : 0.05; + f_t steal_chance = settings_.bnb_steal_chance >= 0 ? settings_.bnb_steal_chance : 0.05; worker->calculate_num_diving_workers(bfs_worker_pool_.num_workers(), diving_worker_pool_.num_workers(), diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index f9baf6f3da..0f35a7d479 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -110,7 +110,7 @@ struct simplex_solver_settings_t { mip_batch_pdlp_reliability_branching(0), strong_branching_simplex_iteration_limit(-1), random_seed(0), - bnb_node_steal_chance(-1), + bnb_steal_chance(-1), bnb_nodes_per_steal(10), bnb_max_steal_attempts(3), reliability_branching(-1), @@ -212,7 +212,7 @@ struct simplex_solver_settings_t { // -1 - automatic (0.05) // 0 - disable // >0 - set the stealing chance [0, 1] - f_t bnb_node_steal_chance; + f_t bnb_steal_chance; i_t bnb_nodes_per_steal; i_t bnb_max_steal_attempts; diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index ce6b602fba..f4d1735e18 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -356,6 +356,21 @@ solution_t mip_solver_t::run_solver() ? 2 : context.settings.reduced_cost_strengthening; + char* steal_chance_str = std::getenv("CUOPT_BNB_STEAL_CHANCE"); + if (steal_chance_str != nullptr) { + branch_and_bound_settings.bnb_steal_chance = atof(steal_chance_str); + } + + char* max_steal_attempts_str = std::getenv("CUOPT_BNB_MAX_STEAL_ATTEMPTS"); + if (max_steal_attempts_str != nullptr) { + branch_and_bound_settings.bnb_max_steal_attempts = atoi(max_steal_attempts_str); + } + + char* nodes_per_steal_str = std::getenv("CUOPT_BNB_NODES_PER_STEAL"); + if (nodes_per_steal_str != nullptr) { + branch_and_bound_settings.bnb_nodes_per_steal = atoi(nodes_per_steal_str); + } + if (context.settings.num_cpu_threads < 0) { branch_and_bound_settings.num_threads = std::max(1, omp_get_max_threads() - 1); } else { From 5daa24a338836dc15c9ae6aeef2ea8fee9ba60ae Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 22 Apr 2026 16:50:08 +0200 Subject: [PATCH 36/53] fixed AB/BA hazard when stealing nodes. fixed data race when activating diving workers. fixed worker "leak". Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 15 ++++++++----- cpp/src/branch_and_bound/worker.hpp | 21 ++++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index e770d77253..ef2310a3c6 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1451,13 +1451,14 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, circular_deque_t*> stack(4); stack.push_front(start_node); - f_t lower_bound = get_lower_bound(); - f_t upper_bound = upper_bound_; - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); worker->recompute_basis = true; worker->recompute_bounds = true; + f_t lower_bound = get_lower_bound(); + f_t upper_bound = upper_bound_; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); + while (stack.size() > 0 && (solver_status_ == mip_status_t::UNSET && is_running_) && rel_gap > settings_.relative_mip_gap_tol && abs_gap > settings_.absolute_mip_gap_tol) { if (worker->worker_id == 0) { repair_heuristic_solutions(); } @@ -1620,11 +1621,13 @@ bfs_worker_t* branch_and_bound_t::launch_bfs_worker( if (toc(exploration_stats_.start_time) > settings_.time_limit || solver_status_ != mip_status_t::UNSET) { + bfs_worker_pool_.return_worker_to_pool(idle_worker); return nullptr; } assert(start_node != nullptr); idle_worker->init(start_node); + idle_worker->set_active(); #pragma omp task affinity(*idle_worker) priority(99) best_first_search_with(idle_worker); @@ -1818,6 +1821,7 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* if (toc(exploration_stats_.start_time) > settings_.time_limit || solver_status_ != mip_status_t::UNSET) { + diving_worker_pool_.return_worker_to_pool(diving_worker); return false; } @@ -1827,6 +1831,7 @@ bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* if (bfs_worker->active_diving_workers[strategy] < bfs_worker->max_diving_workers[strategy]) { diving_worker->search_strategy = strategy; diving_worker->bfs_worker = bfs_worker; + diving_worker->set_active(); bfs_worker->active_diving_workers[strategy]++; bfs_worker->total_active_diving_workers++; @@ -1856,7 +1861,7 @@ void branch_and_bound_t::single_threaded_solve() node_queue.push(search_tree_.root.get_down_child()); node_queue.push(search_tree_.root.get_up_child()); worker->lower_bound = worker->node_queue.get_lower_bound(); - worker->is_active = true; + worker->set_active(); best_first_search_with(worker); } diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 38058cbb9e..1b1d9afc21 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -119,6 +119,8 @@ class branch_and_bound_worker_t { return node_presolver.bounds_strengthening( settings, bounds_changed, leaf_problem.lower, leaf_problem.upper); } + + void set_active() { is_active = true; } }; template @@ -161,7 +163,6 @@ class bfs_worker_t : public branch_and_bound_worker_t { node_queue.push(node); this->lower_bound = node->lower_bound; - this->is_active = true; } void set_inactive() { this->is_active = false; } @@ -175,15 +176,19 @@ class bfs_worker_t : public branch_and_bound_worker_t { return false; } - other->node_queue.lock(); - this->node_queue.lock(); - while (num_nodes > 0 && other->node_queue.best_first_queue_size() > 1) { - mip_node_t* node = other->node_queue.pop_best_first(); + while (num_nodes > 0) { + other->node_queue.lock(); + mip_node_t* node = other->node_queue.best_first_queue_size() > num_nodes + ? other->node_queue.pop_best_first() + : nullptr; + other->node_queue.unlock(); + if (node == nullptr) { break; } + + this->node_queue.lock(); this->node_queue.push(node); + this->node_queue.unlock(); --num_nodes; } - this->node_queue.unlock(); - other->node_queue.unlock(); return true; } @@ -243,13 +248,13 @@ class diving_worker_t : public branch_and_bound_worker_t { using Base = branch_and_bound_worker_t; using Base::Base; + // Set `is_active = true` when the worker is ready. void init(const mip_node_t* node, const lp_problem_t& original_lp) { start_node = node->detach_copy(); this->start_lower = original_lp.lower; this->start_upper = original_lp.upper; this->lower_bound = node->lower_bound; - this->is_active = true; std::fill(this->bounds_changed.begin(), this->bounds_changed.end(), false); node->get_variable_bounds(this->start_lower, this->start_upper, this->bounds_changed); } From 1d12f90535de90238eb32de347296676822cca8c Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 22 Apr 2026 17:29:19 +0200 Subject: [PATCH 37/53] simplified bfs worker launch to avoid nested mutexes. simplified lower bound calculation. Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 19 +++++++++++++++---- cpp/src/branch_and_bound/mip_node.hpp | 2 +- cpp/src/branch_and_bound/worker.hpp | 16 ---------------- cpp/src/branch_and_bound/worker_pool.hpp | 4 +++- cpp/src/utilities/circular_deque.hpp | 2 +- 5 files changed, 20 insertions(+), 23 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index ef2310a3c6..2a8665cb96 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1472,11 +1472,22 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, // If any best-first worker become idle, if (bfs_worker_pool_.num_idle_workers() > 0 && worker->node_queue.best_first_queue_size() > 0) { worker->node_queue.lock(); - mip_node_t* node = worker->node_queue.bfs_top(); + mip_node_t* node = worker->node_queue.pop_best_first(); + + // We need to temporarily save the lower bound in this worker so it is + // considered when calculating the global lower bound. + f_t node_lower_bound = node ? node->lower_bound : std::numeric_limits::infinity(); + worker->lower_bound = std::min(worker->lower_bound.load(), node_lower_bound); + + worker->node_queue.unlock(); + if (node != nullptr) { - if (launch_bfs_worker(node)) { worker->node_queue.pop_best_first(); } + if (!launch_bfs_worker(node)) { + worker->node_queue.lock(); + worker->node_queue.push(node); + worker->node_queue.unlock(); + } } - worker->node_queue.unlock(); } assert(stack.size() <= 2); @@ -1489,7 +1500,7 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, // - The current node and its siblings uses the lower bound of the parent before solving the LP // relaxation // - The lower bound of the parent is lower or equal to its children - worker->lower_bound = node_ptr->lower_bound; + worker->lower_bound = std::min(worker->node_queue.get_lower_bound(), node_ptr->lower_bound); if (node_ptr->lower_bound > upper_bound_.load()) { search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index 70c5ae416d..8ef70ddab2 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -297,7 +297,7 @@ class mip_node_t { // Indicate if we can dive from this node or not. This is set to false when // this node was already selected for diving once. - bool can_dive{true}; + omp_atomic_t can_dive{true}; // Worker-local identification for deterministic ordering: // - origin_worker_id: which worker created this node diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 1b1d9afc21..728d73c60b 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -144,17 +144,6 @@ class bfs_worker_t : public branch_and_bound_worker_t { total_active_diving_workers = 0; } - f_t get_lower_bound() - { - f_t lower_bound = std::numeric_limits::infinity(); - - if (this->is_active) { - lower_bound = std::min(node_queue.get_lower_bound(), this->lower_bound.load()); - } - - return lower_bound; - } - void init(mip_node_t* node) { assert(!this->is_active.load()); @@ -265,11 +254,6 @@ class diving_worker_t : public branch_and_bound_worker_t { settings, this->bounds_changed, this->start_lower, this->start_upper); } - f_t get_lower_bound() - { - return this->is_active ? this->lower_bound.load() : std::numeric_limits::infinity(); - } - void set_inactive() { assert(this->is_active.load()); diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index 93e2bde6cc..5aab621247 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -74,7 +74,9 @@ class worker_pool_t { if (is_initialized) { for (i_t i = 0; i < workers_.size(); ++i) { - lower_bound = std::min(workers_[i]->get_lower_bound(), lower_bound); + if (workers_[i]->is_active.load()) { + lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); + } } } diff --git a/cpp/src/utilities/circular_deque.hpp b/cpp/src/utilities/circular_deque.hpp index 6e9d8f5b05..3f420a8550 100644 --- a/cpp/src/utilities/circular_deque.hpp +++ b/cpp/src/utilities/circular_deque.hpp @@ -23,7 +23,7 @@ namespace cuopt { template class circular_deque_t { public: - circular_deque_t() : buffer_(1), capacity_(1), head_(0), tail_(0) {} + circular_deque_t() = default; // Allocates storage for exactly `capacity` elements up front. explicit circular_deque_t(size_t capacity) From d936e058bf8fb25d68a303b765cebfdaf36ad03d Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Thu, 23 Apr 2026 13:50:48 +0200 Subject: [PATCH 38/53] added comments. revert changes to default constructor of circular_deque Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 8 +++++--- cpp/src/branch_and_bound/branch_and_bound.hpp | 7 ++++++- cpp/src/branch_and_bound/worker.hpp | 8 ++++++++ cpp/src/utilities/circular_deque.hpp | 2 +- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 2a8665cb96..299298ed0f 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1612,8 +1612,8 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, } // If the solver was forced to stop, but we still have nodes to explore - // in the stack, then we should add all the pending nodes back to the heap so the lower - // bound of the solver is set to the correct value. + // in the stack, then we should add all the pending nodes back to the heap so the global lower + // bound is set to the correct value. while (!stack.empty()) { auto node = stack.front(); stack.pop_front(); @@ -1627,6 +1627,7 @@ template bfs_worker_t* branch_and_bound_t::launch_bfs_worker( mip_node_t* start_node) { + // Take an idle node from the pool bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); if (!idle_worker) { return nullptr; } @@ -1704,6 +1705,7 @@ void branch_and_bound_t::best_first_search_with(bfs_worker_t break; } + // Steal a node with some probability or when it is empty. The victim is determined at random. if (worker->node_queue.best_first_queue_size() == 0 || worker->rng.next_double() < steal_chance) { for (i_t i = 0; i < settings_.bnb_max_steal_attempts; ++i) { @@ -2615,7 +2617,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut exploration_stats_.nodes_unexplored = 2; exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); - min_node_queue_size_ = 2 * settings_.num_threads; + min_node_queue_size_ = 20; if (settings_.diving_settings.coefficient_diving != 0) { calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 52cd96525d..1874c288ed 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -284,9 +284,13 @@ class branch_and_bound_t { // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); + // Launch a new bfs worker initialized from the `start_node`. bfs_worker_t* launch_bfs_worker(mip_node_t* start_node); + // Launch a new diving worker from a given bfs worker. The dive will start + // from the node at the top of the local heap. bool launch_diving_worker(bfs_worker_t* bfs_worker); + // Perform best-first search with a given bfs worker. void best_first_search_with(bfs_worker_t* worker); // We use best-first to pick the `start_node` and then perform a depth-first search @@ -297,8 +301,9 @@ class branch_and_bound_t { // Perform a deep dive in the subtree determined by the `start_node` in order // to find integer feasible solutions. void dive_with(diving_worker_t* worker); + // Run the branch-and-bound algorithm in single threaded mode. - // This disable all diving heuristics. + // This disables all diving heuristics. void single_threaded_solve(); // Solve the LP relaxation of a leaf node diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 728d73c60b..9eb05113e3 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -156,6 +156,7 @@ class bfs_worker_t : public branch_and_bound_worker_t { void set_inactive() { this->is_active = false; } + // Steal nodes from another worker bool steal_node_from(bfs_worker_t* other, i_t num_nodes) { assert(num_nodes > 0); @@ -182,6 +183,8 @@ class bfs_worker_t : public branch_and_bound_worker_t { return true; } + // Calculate the number of diving workers that this worker can launch. Having a fixed number + // of workers allows the solver to be more deterministic. void calculate_num_diving_workers(i_t num_bfs_workers, i_t total_diving_workers, bool has_incumbent, @@ -240,6 +243,9 @@ class diving_worker_t : public branch_and_bound_worker_t { // Set `is_active = true` when the worker is ready. void init(const mip_node_t* node, const lp_problem_t& original_lp) { + // Creates a copy of the node that is disconnected from the main tree, such that the + // diving does not modify the main tree. We need to store the variables bounds + // associated with this node, since we cannot retrieve it from the tree start_node = node->detach_copy(); this->start_lower = original_lp.lower; this->start_upper = original_lp.upper; @@ -248,12 +254,14 @@ class diving_worker_t : public branch_and_bound_worker_t { node->get_variable_bounds(this->start_lower, this->start_upper, this->bounds_changed); } + // Apply bound strengthening to the starting variable bounds bool presolve_start_bounds(const simplex_solver_settings_t& settings) { return this->node_presolver.bounds_strengthening( settings, this->bounds_changed, this->start_lower, this->start_upper); } + // Set this node inactive void set_inactive() { assert(this->is_active.load()); diff --git a/cpp/src/utilities/circular_deque.hpp b/cpp/src/utilities/circular_deque.hpp index 3f420a8550..6e9d8f5b05 100644 --- a/cpp/src/utilities/circular_deque.hpp +++ b/cpp/src/utilities/circular_deque.hpp @@ -23,7 +23,7 @@ namespace cuopt { template class circular_deque_t { public: - circular_deque_t() = default; + circular_deque_t() : buffer_(1), capacity_(1), head_(0), tail_(0) {} // Allocates storage for exactly `capacity` elements up front. explicit circular_deque_t(size_t capacity) From 3e4023fb03d3f56a3f8329482b64eaf8b2270c31 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Mon, 27 Apr 2026 14:42:05 +0200 Subject: [PATCH 39/53] added ability to launch a new bfs worker with more than one node. fix incorrect handling with just a single bfs worker. Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 20 +++++++++++-------- cpp/src/branch_and_bound/branch_and_bound.hpp | 2 +- cpp/src/branch_and_bound/worker.hpp | 18 +++++++++++------ cpp/src/branch_and_bound/worker_pool.hpp | 4 ++-- 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 299298ed0f..67fcc697cb 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1482,7 +1482,7 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, worker->node_queue.unlock(); if (node != nullptr) { - if (!launch_bfs_worker(node)) { + if (!launch_bfs_worker({node})) { worker->node_queue.lock(); worker->node_queue.push(node); worker->node_queue.unlock(); @@ -1625,7 +1625,7 @@ void branch_and_bound_t::plunge_with(bfs_worker_t* worker, template bfs_worker_t* branch_and_bound_t::launch_bfs_worker( - mip_node_t* start_node) + const std::vector*>& start_nodes) { // Take an idle node from the pool bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); @@ -1638,7 +1638,7 @@ bfs_worker_t* branch_and_bound_t::launch_bfs_worker( } assert(start_node != nullptr); - idle_worker->init(start_node); + idle_worker->init(start_nodes); idle_worker->set_active(); #pragma omp task affinity(*idle_worker) priority(99) @@ -2635,7 +2635,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (settings_.deterministic) { run_deterministic_coordinator(Arow_); } else if (settings_.num_threads > 1) { - const i_t num_workers = 2 * settings_.num_threads; + const i_t num_workers = settings_.num_threads; const i_t num_bfs_workers = std::max(settings_.num_threads / 2, 1); const i_t num_diving_workers = num_workers - num_bfs_workers; bfs_worker_pool_.init(num_bfs_workers, original_lp_, Arow_, var_types_, settings_); @@ -2646,8 +2646,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut { #pragma omp master { - launch_bfs_worker(search_tree_.root.get_up_child()); - launch_bfs_worker(search_tree_.root.get_down_child()); + if (num_bfs_workers > 1) { + launch_bfs_worker({search_tree_.root.get_up_child()}); + launch_bfs_worker({search_tree_.root.get_down_child()}); + } else { + launch_bfs_worker({search_tree_.root.get_up_child(), search_tree_.root.get_down_child()}); + } } } } else { @@ -2816,8 +2820,8 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri deterministic_horizon_step_ = 0.50; // Compute worker counts using the same formula as reliability-branching scheduler - const i_t num_workers = 2 * settings_.num_threads; - const i_t num_bfs_workers = std::max(num_workers / 4, 1); + const i_t num_workers = settings_.num_threads; + const i_t num_bfs_workers = std::max(num_workers / 2, 1); const i_t num_diving_workers = num_workers - num_bfs_workers; deterministic_mode_enabled_ = true; diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 1874c288ed..2f6d170fb0 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -285,7 +285,7 @@ class branch_and_bound_t { void repair_heuristic_solutions(); // Launch a new bfs worker initialized from the `start_node`. - bfs_worker_t* launch_bfs_worker(mip_node_t* start_node); + bfs_worker_t* launch_bfs_worker(const std::vector*>&); // Launch a new diving worker from a given bfs worker. The dive will start // from the node at the top of the local heap. bool launch_diving_worker(bfs_worker_t* bfs_worker); diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index 9eb05113e3..acf1982db3 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -144,14 +144,17 @@ class bfs_worker_t : public branch_and_bound_worker_t { total_active_diving_workers = 0; } - void init(mip_node_t* node) + void init(const std::vector*>& nodes) { assert(!this->is_active.load()); assert(node_queue.best_first_queue_size() == 0); assert(node != nullptr); - node_queue.push(node); - this->lower_bound = node->lower_bound; + for (auto* node : nodes) { + node_queue.push(node); + } + + this->lower_bound = node_queue.get_lower_bound(); } void set_inactive() { this->is_active = false; } @@ -159,11 +162,12 @@ class bfs_worker_t : public branch_and_bound_worker_t { // Steal nodes from another worker bool steal_node_from(bfs_worker_t* other, i_t num_nodes) { + bool steal = false; assert(num_nodes > 0); if (!other->is_active || this == other || other->node_queue.best_first_queue_size() < 2 * num_nodes) { - return false; + return steal; } while (num_nodes > 0) { @@ -178,9 +182,10 @@ class bfs_worker_t : public branch_and_bound_worker_t { this->node_queue.push(node); this->node_queue.unlock(); --num_nodes; + steal = true; } - return true; + return steal; } // Calculate the number of diving workers that this worker can launch. Having a fixed number @@ -240,7 +245,8 @@ class diving_worker_t : public branch_and_bound_worker_t { using Base = branch_and_bound_worker_t; using Base::Base; - // Set `is_active = true` when the worker is ready. + // After calling this routine, you need to set `is_active = true` when the worker is ready. + // Note that the starting node may be dropped if become infeasible via bound propagation. void init(const mip_node_t* node, const lp_problem_t& original_lp) { // Creates a copy of the node that is disconnected from the main tree, such that the diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp index 5aab621247..9396f48c04 100644 --- a/cpp/src/branch_and_bound/worker_pool.hpp +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -15,8 +15,8 @@ namespace cuopt::linear_programming::dual_simplex { template class worker_pool_t { public: - using i_t = WorkerType::int_type; - using f_t = WorkerType::float_type; + using i_t = typename WorkerType::int_type; + using f_t = typename WorkerType::float_type; void init(i_t num_workers, const lp_problem_t& original_lp, From f452639b5731b0a5710b9447d7f9c1db1cf8de6a Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Mon, 27 Apr 2026 14:58:41 +0200 Subject: [PATCH 40/53] fixed assert Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 1 - cpp/src/branch_and_bound/worker.hpp | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 67fcc697cb..6fbfa5c664 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1637,7 +1637,6 @@ bfs_worker_t* branch_and_bound_t::launch_bfs_worker( return nullptr; } - assert(start_node != nullptr); idle_worker->init(start_nodes); idle_worker->set_active(); diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index acf1982db3..bc338a69a3 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -148,9 +148,10 @@ class bfs_worker_t : public branch_and_bound_worker_t { { assert(!this->is_active.load()); assert(node_queue.best_first_queue_size() == 0); - assert(node != nullptr); + assert(nodes.size() > 0); for (auto* node : nodes) { + assert(node != nullptr); node_queue.push(node); } From 284d5a3308dad4801ef2f322da2ed5bbf3a4f4ca Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 28 Apr 2026 10:16:36 +0200 Subject: [PATCH 41/53] correctly clean the vstatus buffer in the mip_node after branching Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/mip_node.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index 8ef70ddab2..8ad86c315d 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -166,7 +166,7 @@ class mip_node_t { children[0] = std::move(down_child); children[1] = std::move(up_child); // When we add children we no longer need to store our basis - vstatus.clear(); + vstatus = {}; } bool is_inactive() const From 93e834739afcfcb54aaa2c632d865d18fd91ab21 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 28 Apr 2026 10:30:39 +0200 Subject: [PATCH 42/53] cleaning code Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 36 ++++++++-------- cpp/src/branch_and_bound/branch_and_bound.hpp | 2 - cpp/src/branch_and_bound/node_queue.hpp | 12 ------ cpp/src/branch_and_bound/pseudo_costs.hpp | 42 +++++++++---------- cpp/src/utilities/omp_helpers.hpp | 20 +-------- 5 files changed, 41 insertions(+), 71 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 6fbfa5c664..ddb75c6d18 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -2753,8 +2753,8 @@ Work Units: 0 0.5 1. ──────────────────────────────────────────────────────────────────────────────────────────► Work Unit Time -Legend: ▓▓▓ = actively working ░░░ = waiting at barrier [hash] = state hash for -verification wut = work unit timestamp PC = pseudo-costs snap = snapshot (local copy) +Legend: ▓▓▓ = actively working ░░░ = waiting at barrier [hash] = state hash for verification + wut = work unit timestamp PC = pseudo-costs snap = snapshot (local copy) */ @@ -2792,22 +2792,23 @@ Producer Sync: Producing solutions in the past would break determinism, therefore this unidirectional sync ensures no such thing can occur. Instrumentation Aggregator: Collects multiple instrument vectors into a single aggregation point for estimating work from memory operations. Worker Context: Object -representing the "context" (e.g.: the worker) that should register the amount of work recorded -There is a 1context:1worker mapping. The Work Unit Scheduler registers such contexts and ensure -they remained synchronized together. Queued Integer Solutions: New integer solutions found within -horizons are queued with a work unit timestamp, in order to be sorted and played in order during -the sync callback. Creation Sequence: In nondeterministic mode, a single global atomic integer is -used to generate sequential IDs for the nodes. Since this is a global atomic, it is inherently +representing the "context" (e.g.: the worker) that should register the amount of work recorded There +is a 1context:1worker mapping. The Work Unit Scheduler registers such contexts and ensure they +remained synchronized together. Queued Integer Solutions: New integer solutions found within +horizons are queued with a work unit timestamp, in order to be sorted and played in order during the +sync callback. Creation Sequence: In nondeterministic mode, a single global atomic integer is used +to generate sequential IDs for the nodes. Since this is a global atomic, it is inherently nondeterministic. To fix this, in deterministic mode, nodes are addressed by a tuple - where "worker_id" is the ID of the worker that created this node, and "seq_id" is a sequential -ID local to the worker.\ This sequential ID is similar in principle to the global atomic ID -sequence of the nondeterminsitic mode but since it is local to each worker, it is updated serially -and thus is deterministic. worker IDs are unique, and sequence IDs are unique to their workers, -therefor is a globally unique node identifier. Pseudocost Update: Each worker -updates its local pseudocosts when branching. These updates are queued within horizons. During the -horizon sync, these updates are all played in order, and the newly updated global pseudocosts are -broadcast to the worker's pseudocost snapshots for the coming horizon. + where "worker_id" is the ID of the worker that created this node, and "seq_id" is a sequential ID +local to the worker.\ This sequential ID is similar in principle to the global atomic ID sequence of +the nondeterminsitic mode but since it is local to each worker, it is updated serially and thus is +deterministic. worker IDs are unique, and sequence IDs are unique to their workers, therefor + is a globally unique node identifier. +Pseudocost Update: + Each worker updates its local pseudocosts when branching. These updates are queued within +horizons. During the horizon sync, these updates are all played in order, and the newly updated +global pseudocosts are broadcast to the worker's pseudocost snapshots for the coming horizon. */ @@ -2917,8 +2918,7 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri "Sync%% | NoWork\n"); settings_.log.printf( " " - "-------+---------+----------+--------+---------+--------+----------+----------+-------+-----" - "--" + "-------+---------+----------+--------+---------+--------+----------+----------+-------+-------" "\n"); for (const auto& worker : *deterministic_workers_) { double sync_time = worker.work_context.total_sync_time; diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 2f6d170fb0..58a5780de9 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -438,8 +438,6 @@ class branch_and_bound_t { } }; heap_t diving_heap_; - - friend class branch_and_bound_worker_t; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 3c9ddea8a9..15c50c99b7 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -71,18 +71,6 @@ class heap_t { // Read-only access to underlying buffer for iteration without modification const std::vector& data() const { return buffer; } - // Remove entries matching `should_remove` and rebuild the heap. - // Caller must hold any external lock before calling this. - template - void compact(Pred&& should_remove) - { - auto it = std::remove_if(buffer.begin(), buffer.end(), std::forward(should_remove)); - size_t removed = std::distance(it, buffer.end()); - buffer.erase(it, buffer.end()); - num_entries_ = buffer.size(); - if (removed > 0) { std::make_heap(buffer.begin(), buffer.end(), comp); } - } - private: std::vector buffer; omp_atomic_t num_entries_{0}; diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 30f6e5d7e9..c6e38a69db 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -245,20 +245,20 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { pseudo_cost_snapshot_t& operator=( const pseudo_costs_t& other) { - Base::AT = other.AT; - Base::pdlp_warm_cache = other.pdlp_warm_cache; + this->AT = other.AT; + this->pdlp_warm_cache = other.pdlp_warm_cache; i_t n = other.pseudo_cost_num_down.size(); - Base::pseudo_cost_num_down.resize(n); - Base::pseudo_cost_num_up.resize(n); - Base::pseudo_cost_sum_down.resize(n); - Base::pseudo_cost_sum_up.resize(n); + this->pseudo_cost_num_down.resize(n); + this->pseudo_cost_num_up.resize(n); + this->pseudo_cost_sum_down.resize(n); + this->pseudo_cost_sum_up.resize(n); for (i_t i = 0; i < n; ++i) { - Base::pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].get_no_atomic(); - Base::pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].get_no_atomic(); - Base::pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].get_no_atomic(); - Base::pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].get_no_atomic(); + this->pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].underlying(); + this->pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].underlying(); + this->pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].underlying(); + this->pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].underlying(); } return *this; @@ -267,12 +267,12 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { pseudo_cost_snapshot_t& operator=(const Base& other) { if (this != &other) { - Base::AT = other.AT; - Base::pdlp_warm_cache = other.pdlp_warm_cache; - Base::pseudo_cost_num_down = other.pseudo_cost_num_down; - Base::pseudo_cost_num_up = other.pseudo_cost_num_up; - Base::pseudo_cost_sum_down = other.pseudo_cost_sum_down; - Base::pseudo_cost_sum_up = other.pseudo_cost_sum_up; + this->AT = other.AT; + this->pdlp_warm_cache = other.pdlp_warm_cache; + this->pseudo_cost_num_down = other.pseudo_cost_num_down; + this->pseudo_cost_num_up = other.pseudo_cost_num_up; + this->pseudo_cost_sum_down = other.pseudo_cost_sum_down; + this->pseudo_cost_sum_up = other.pseudo_cost_sum_up; } return *this; }; @@ -282,11 +282,11 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { { updates_.push_back({variable, direction, delta, clock, worker_id}); if (direction == rounding_direction_t::DOWN) { - Base::pseudo_cost_sum_down[variable] += delta; - ++Base::pseudo_cost_num_down[variable]; + this->pseudo_cost_sum_down[variable] += delta; + ++this->pseudo_cost_num_down[variable]; } else { - Base::pseudo_cost_sum_up[variable] += delta; - ++Base::pseudo_cost_num_up[variable]; + this->pseudo_cost_sum_up[variable] += delta; + ++this->pseudo_cost_num_up[variable]; } } @@ -297,7 +297,7 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { return result; } - i_t n_vars() const { return Base::pseudo_cost_sum_down.size(); } + i_t n_vars() const { return this->pseudo_cost_sum_down.size(); } private: std::vector> updates_; diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index 8ac81b0af4..f7897d3900 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -125,16 +125,15 @@ class omp_atomic_t { T fetch_sub(T inc) { return fetch_add(-inc); } // Get the underlying value without atomics - T& get_no_atomic() { return val; } + T& underlying() { return val; } - T get_no_atomic() const { return val; } + T underlying() const { return val; } private: T val; friend double fetch_min(omp_atomic_t& atomic_var, double other); friend double fetch_max(omp_atomic_t& atomic_var, double other); - friend bool compare_exchange(omp_atomic_t& atomic_var, int& expected, int desired); }; // Free non-template functions are necessary because of a clang 20 bug @@ -162,21 +161,6 @@ inline double fetch_max(omp_atomic_t& atomic_var, double other) return old; } -// CAS: atomically sets `atomic_var` to `desired` if it equals `expected`. -// On failure, loads the current value into `expected`. -// Returns true if the exchange happened. -inline bool compare_exchange(omp_atomic_t& atomic_var, int& expected, int desired) -{ - int old; -#pragma omp atomic compare capture seq_cst - { - old = atomic_var.val; - if (atomic_var.val == expected) { atomic_var.val = desired; } - } - bool success = (old == expected); - if (!success) { expected = old; } - return success; -} #endif } // namespace cuopt From 7df7aa1857f117440ec4a4c8e1589f23e9c2459a Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 28 Apr 2026 14:49:33 +0200 Subject: [PATCH 43/53] each mip node now stores vstatus in a compressed format to save memory space Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 36 ++++---- cpp/src/branch_and_bound/mip_node.hpp | 12 +-- cpp/src/branch_and_bound/pseudo_costs.cpp | 6 +- cpp/src/branch_and_bound/worker.hpp | 2 + cpp/src/dual_simplex/initial_basis.cpp | 84 +++++++++++++++++++ cpp/src/dual_simplex/initial_basis.hpp | 6 +- 6 files changed, 118 insertions(+), 28 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index ddb75c6d18..d0ff16ee23 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1248,7 +1248,7 @@ std::pair branch_and_bound_t::upd policy.select_branch_variable(node_ptr, leaf_fractional, leaf_solution.x); round_dir = dir; - assert(node_ptr->vstatus.size() == leaf_problem.num_cols); + assert(worker->leaf_vstatus.size() == leaf_problem.num_cols); assert(branch_var >= 0); assert(dir != rounding_direction_t::NONE); @@ -1262,7 +1262,7 @@ std::pair branch_and_bound_t::upd branch_var, leaf_solution.x[branch_var], num_frac, - node_ptr->vstatus, + worker->leaf_vstatus, leaf_problem, log); search_tree.update(node_ptr, node_status_t::HAS_CHILDREN); @@ -1345,8 +1345,9 @@ dual::status_t branch_and_bound_t::solve_node_lp( } #endif - std::vector& leaf_vstatus = node_ptr->vstatus; - assert(leaf_vstatus.size() == worker->leaf_problem.num_cols); + worker->leaf_vstatus = + decompress_vstatus(node_ptr->packed_vstatus, worker->leaf_problem.num_cols); + assert(worker->leaf_vstatus.size() == worker->leaf_problem.num_cols); simplex_solver_settings_t lp_settings = settings_; lp_settings.concurrent_halt = &node_concurrent_halt_; @@ -1405,7 +1406,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( lp_start_time, worker->leaf_problem, lp_settings, - leaf_vstatus, + worker->leaf_vstatus, worker->basis_factors, worker->basic_list, worker->nonbasic_list, @@ -1422,7 +1423,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( worker->basis_factors, worker->basic_list, worker->nonbasic_list, - leaf_vstatus, + worker->leaf_vstatus, worker->leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); @@ -3221,10 +3222,10 @@ node_status_t branch_and_bound_t::solve_node_deterministic( // Solve LP relaxation worker.leaf_solution.resize(worker.leaf_problem.num_rows, worker.leaf_problem.num_cols); - std::vector& leaf_vstatus = node_ptr->vstatus; - i_t node_iter = 0; - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; + worker.leaf_vstatus = decompress_vstatus(node_ptr->packed_vstatus, worker.leaf_problem.num_cols); + i_t node_iter = 0; + f_t lp_start_time = tic(); + std::vector leaf_edge_norms = edge_norms_; dual::status_t lp_status = dual_phase2_with_advanced_basis(2, 0, @@ -3232,7 +3233,7 @@ node_status_t branch_and_bound_t::solve_node_deterministic( lp_start_time, worker.leaf_problem, lp_settings, - leaf_vstatus, + worker.leaf_vstatus, worker.basis_factors, worker.basic_list, worker.nonbasic_list, @@ -3250,7 +3251,7 @@ node_status_t branch_and_bound_t::solve_node_deterministic( worker.basis_factors, worker.basic_list, worker.nonbasic_list, - leaf_vstatus, + worker.leaf_vstatus, leaf_edge_norms, &worker.work_context); lp_status = convert_lp_status_to_dual_status(second_status); @@ -3835,10 +3836,9 @@ void branch_and_bound_t::deterministic_dive( // Solve LP relaxation worker.leaf_solution.resize(worker.leaf_problem.num_rows, worker.leaf_problem.num_cols); - std::vector& leaf_vstatus = node_ptr->vstatus; - i_t node_iter = 0; - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; + i_t node_iter = 0; + f_t lp_start_time = tic(); + std::vector leaf_edge_norms = edge_norms_; dual::status_t lp_status = dual_phase2_with_advanced_basis(2, 0, @@ -3846,7 +3846,7 @@ void branch_and_bound_t::deterministic_dive( lp_start_time, worker.leaf_problem, lp_settings, - leaf_vstatus, + worker.leaf_vstatus, worker.basis_factors, worker.basic_list, worker.nonbasic_list, @@ -3863,7 +3863,7 @@ void branch_and_bound_t::deterministic_dive( worker.basis_factors, worker.basic_list, worker.nonbasic_list, - leaf_vstatus, + worker.leaf_vstatus, leaf_edge_norms, &worker.work_context); lp_status = convert_lp_status_to_dual_status(second_status); diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index 8ad86c315d..7dae385430 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -55,7 +55,7 @@ class mip_node_t { branch_var_upper(std::numeric_limits::infinity()), fractional_val(std::numeric_limits::infinity()), objective_estimate(std::numeric_limits::infinity()), - vstatus(0) + packed_vstatus(0) { children[0] = nullptr; children[1] = nullptr; @@ -71,7 +71,7 @@ class mip_node_t { branch_dir(rounding_direction_t::NONE), integer_infeasible(-1), objective_estimate(std::numeric_limits::infinity()), - vstatus(basis) + packed_vstatus(compress_vstatus(basis)) { children[0] = nullptr; children[1] = nullptr; @@ -95,7 +95,7 @@ class mip_node_t { fractional_val(branch_var_value), integer_infeasible(integer_inf), objective_estimate(parent_node->objective_estimate), - vstatus(basis) + packed_vstatus(compress_vstatus(basis)) { branch_var_lower = branch_direction == rounding_direction_t::DOWN ? problem.lower[branch_var] : std::ceil(branch_var_value); @@ -166,7 +166,7 @@ class mip_node_t { children[0] = std::move(down_child); children[1] = std::move(up_child); // When we add children we no longer need to store our basis - vstatus = {}; + packed_vstatus = {}; } bool is_inactive() const @@ -262,7 +262,7 @@ class mip_node_t { copy.depth = depth; copy.node_id = node_id; copy.integer_infeasible = integer_infeasible; - copy.vstatus = vstatus; + copy.packed_vstatus = packed_vstatus; copy.branch_var = branch_var; copy.branch_dir = branch_dir; copy.branch_var_lower = branch_var_lower; @@ -293,7 +293,7 @@ class mip_node_t { mip_node_t* parent; std::unique_ptr children[2]; - std::vector vstatus; + std::vector packed_vstatus; // Indicate if we can dive from this node or not. This is set to false when // this node was already selected for diving once. diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index 8c29046302..487668d25d 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -1501,7 +1501,7 @@ i_t pseudo_costs_t::reliable_variable_selection( single_pivot_objective_change_estimate(worker->leaf_problem, settings, *AT, - node_ptr->vstatus, + worker->leaf_vstatus, j, basic_map[j], leaf_solution, @@ -1612,7 +1612,7 @@ i_t pseudo_costs_t::reliable_variable_selection( const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, - node_ptr->vstatus, + worker->leaf_vstatus, worker->leaf_edge_norms, worker->basis_factors, worker->basic_list, @@ -1657,7 +1657,7 @@ i_t pseudo_costs_t::reliable_variable_selection( const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, - node_ptr->vstatus, + worker->leaf_vstatus, worker->leaf_edge_norms, worker->basis_factors, worker->basic_list, diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp index bc338a69a3..acb8ccca87 100644 --- a/cpp/src/branch_and_bound/worker.hpp +++ b/cpp/src/branch_and_bound/worker.hpp @@ -59,6 +59,7 @@ class branch_and_bound_worker_t { lp_problem_t leaf_problem; lp_solution_t leaf_solution; + std::vector leaf_vstatus; std::vector leaf_edge_norms; basis_update_mpf_t basis_factors; @@ -88,6 +89,7 @@ class branch_and_bound_worker_t { lower_bound(-std::numeric_limits::infinity()), leaf_problem(original_lp), leaf_solution(original_lp.num_rows, original_lp.num_cols), + leaf_vstatus(original_lp.num_cols), basis_factors(original_lp.num_rows, settings.refactor_frequency), basic_list(original_lp.num_rows), nonbasic_list(), diff --git a/cpp/src/dual_simplex/initial_basis.cpp b/cpp/src/dual_simplex/initial_basis.cpp index d69bf8877e..65c5644ae8 100644 --- a/cpp/src/dual_simplex/initial_basis.cpp +++ b/cpp/src/dual_simplex/initial_basis.cpp @@ -18,6 +18,90 @@ namespace cuopt::linear_programming::dual_simplex { +uint8_t encode(variable_status_t vstatus) +{ + assert(vstatus != variable_status_t::SUPERBASIC && + "packed_variable_status_t does not support superbasic variables"); + + uint8_t val = 0; + switch (vstatus) { + case variable_status_t::BASIC: val = 0b00; break; + case variable_status_t::NONBASIC_LOWER: val = 0b01; break; + case variable_status_t::NONBASIC_UPPER: val = 0b10; break; + case variable_status_t::NONBASIC_FIXED: val = 0b11; break; + default: val = 0b11; + } + + return val; +} + +variable_status_t decode(uint8_t val) +{ + val &= 0b11; + if (val == 0b00) return variable_status_t::BASIC; + if (val == 0b01) return variable_status_t::NONBASIC_LOWER; + if (val == 0b10) return variable_status_t::NONBASIC_UPPER; + return variable_status_t::NONBASIC_FREE; +} + +std::vector compress_vstatus(const std::vector& vstatus) +{ + size_t n = vstatus.size() / 4; + size_t has_tail = (vstatus.size() % 4 > 0); + + std::vector packed_vstatus; + packed_vstatus.resize(n + has_tail); + + for (size_t i = 0; i < n; ++i) { + size_t j = i * 4; + packed_vstatus[i] = 0; + packed_vstatus[i] |= encode(vstatus[j]); + packed_vstatus[i] |= encode(vstatus[j + 1]) << 2; + packed_vstatus[i] |= encode(vstatus[j + 2]) << 4; + packed_vstatus[i] |= encode(vstatus[j + 3]) << 6; + } + + if (has_tail) { + size_t j = n * 4; + packed_vstatus[n] = 0; + packed_vstatus[n] |= encode(vstatus[j]); + if (j + 1 < vstatus.size()) packed_vstatus[n] |= encode(vstatus[j + 1]) << 2; + if (j + 2 < vstatus.size()) packed_vstatus[n] |= encode(vstatus[j + 2]) << 4; + if (j + 3 < vstatus.size()) packed_vstatus[n] |= encode(vstatus[j + 3]) << 6; + } + + return packed_vstatus; +} + +std::vector decompress_vstatus(const std::vector& packed_vstatus, + size_t vstatus_size) +{ + size_t n = vstatus_size / 4; + size_t has_tail = (vstatus_size % 4 > 0); + + std::vector vstatus; + vstatus.resize(vstatus_size); + assert(vstatus_size == n + has_tail); + + for (size_t i = 0; i < n; ++i) { + size_t j = i * 4; + vstatus[j] = decode(packed_vstatus[i]); + vstatus[j + 1] = decode(packed_vstatus[i] >> 2); + vstatus[j + 2] = decode(packed_vstatus[i] >> 4); + vstatus[j + 3] = decode(packed_vstatus[i] >> 6); + } + + if (has_tail) { + size_t j = n * 4; + vstatus[j] = decode(packed_vstatus[n]); + if (j + 1 < vstatus.size()) vstatus[j + 1] = decode(packed_vstatus[n] >> 2); + if (j + 2 < vstatus.size()) vstatus[j + 2] = decode(packed_vstatus[n] >> 4); + if (j + 3 < vstatus.size()) vstatus[j + 3] = decode(packed_vstatus[n] >> 6); + } + + return vstatus; +} + template i_t initial_basis_selection(const lp_problem_t& problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/initial_basis.hpp b/cpp/src/dual_simplex/initial_basis.hpp index 646785fbd2..22a172dbe0 100644 --- a/cpp/src/dual_simplex/initial_basis.hpp +++ b/cpp/src/dual_simplex/initial_basis.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -23,6 +23,10 @@ enum class variable_status_t : int8_t { SUPERBASIC = 4 }; +std::vector compress_vstatus(const std::vector& vstatus); +std::vector decompress_vstatus(const std::vector& packed_vstatus, + size_t vstatus_size); + template i_t initial_basis_selection(const lp_problem_t& problem, const simplex_solver_settings_t& settings, From 2cd842a372e7ad866b3158e96c68b74b4f258973 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 28 Apr 2026 14:51:00 +0200 Subject: [PATCH 44/53] fixed assert Signed-off-by: Nicolas L. Guidotti --- cpp/src/dual_simplex/initial_basis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/initial_basis.cpp b/cpp/src/dual_simplex/initial_basis.cpp index 65c5644ae8..fee6160b12 100644 --- a/cpp/src/dual_simplex/initial_basis.cpp +++ b/cpp/src/dual_simplex/initial_basis.cpp @@ -81,7 +81,7 @@ std::vector decompress_vstatus(const std::vector& pa std::vector vstatus; vstatus.resize(vstatus_size); - assert(vstatus_size == n + has_tail); + assert(packed_vstatus.size() == n + has_tail); for (size_t i = 0; i < n; ++i) { size_t j = i * 4; From 226968d9ff355c8d1a04c5d9d06e42b488dddffd Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 28 Apr 2026 14:55:04 +0200 Subject: [PATCH 45/53] fixed NONBASIC_FIXED encoding Signed-off-by: Nicolas L. Guidotti --- cpp/src/dual_simplex/initial_basis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/initial_basis.cpp b/cpp/src/dual_simplex/initial_basis.cpp index fee6160b12..dcb1615359 100644 --- a/cpp/src/dual_simplex/initial_basis.cpp +++ b/cpp/src/dual_simplex/initial_basis.cpp @@ -28,7 +28,7 @@ uint8_t encode(variable_status_t vstatus) case variable_status_t::BASIC: val = 0b00; break; case variable_status_t::NONBASIC_LOWER: val = 0b01; break; case variable_status_t::NONBASIC_UPPER: val = 0b10; break; - case variable_status_t::NONBASIC_FIXED: val = 0b11; break; + case variable_status_t::NONBASIC_FIXED: val = 0b01; break; default: val = 0b11; } From 54d3f6685abe76656e24cd0543ff2d1bba2bba0b Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 28 Apr 2026 16:24:52 +0200 Subject: [PATCH 46/53] fix missing decompression for deterministic diving Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index d0ff16ee23..577cbafb16 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -3840,6 +3840,8 @@ void branch_and_bound_t::deterministic_dive( f_t lp_start_time = tic(); std::vector leaf_edge_norms = edge_norms_; + worker.leaf_vstatus = + decompress_vstatus(node_ptr->packed_vstatus, worker.leaf_problem.num_cols); dual::status_t lp_status = dual_phase2_with_advanced_basis(2, 0, worker.recompute_bounds_and_basis, From 14abf1732bb294990bfab3ebae4b5c67b785c5be Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Wed, 29 Apr 2026 13:30:35 +0200 Subject: [PATCH 47/53] addresses reviewer's comments Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 60 +++---- cpp/src/branch_and_bound/branch_and_bound.hpp | 6 +- cpp/src/branch_and_bound/constants.hpp | 2 +- .../deterministic_workers.hpp | 8 +- .../branch_and_bound/diving_heuristics.cpp | 115 ++++++------- cpp/src/branch_and_bound/mip_node.hpp | 30 ++-- cpp/src/branch_and_bound/pseudo_costs.cpp | 157 +++++++++--------- cpp/src/branch_and_bound/pseudo_costs.hpp | 88 +++++----- 8 files changed, 227 insertions(+), 239 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index dc8513a269..13313a46e2 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -256,7 +256,7 @@ branch_and_bound_t::branch_and_bound_t( incumbent_(1), root_relax_soln_(1, 1), root_crossover_soln_(1, 1), - pc_(1), + pc_(1, solver_settings), solver_status_(mip_status_t::UNSET) { exploration_stats_.start_time = start_time; @@ -808,7 +808,7 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, // Technische Universit¨at Berlin, Berlin, 1999. Accessed: Aug. 08, 2025. // [Online]. Available: https://opus4.kobv.de/opus4-zib/frontdoor/index/index/docId/391 template -rounding_direction_t martin_criteria(f_t val, f_t root_val) +branch_direction_t martin_criteria(f_t val, f_t root_val) { const f_t down_val = std::floor(root_val); const f_t up_val = std::ceil(root_val); @@ -817,10 +817,10 @@ rounding_direction_t martin_criteria(f_t val, f_t root_val) constexpr f_t eps = 1e-6; if (down_dist < up_dist + eps) { - return rounding_direction_t::DOWN; + return branch_direction_t::DOWN; } else { - return rounding_direction_t::UP; + return branch_direction_t::UP; } } @@ -831,9 +831,9 @@ branch_variable_t branch_and_bound_t::variable_selection( branch_and_bound_worker_t* worker) { logger_t log; - log.log = false; - i_t branch_var = -1; - rounding_direction_t round_dir = rounding_direction_t::NONE; + log.log = false; + i_t branch_var = -1; + branch_direction_t round_dir = branch_direction_t::NONE; std::vector current_incumbent; std::vector& solution = worker->leaf_solution.x; @@ -846,14 +846,12 @@ branch_variable_t branch_and_bound_t::variable_selection( worker, var_types_, exploration_stats_, - settings_, upper_bound_, worker_pool_.num_idle_workers(), - log, new_slacks_, original_lp_); } else { - branch_var = pc_.variable_selection(fractional, solution, log); + branch_var = pc_.variable_selection(fractional, solution); } round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); @@ -878,7 +876,7 @@ branch_variable_t branch_and_bound_t::variable_selection( default: log.debug("Unknown variable selection method: %d\n", worker->search_strategy); - return {-1, rounding_direction_t::NONE}; + return {-1, branch_direction_t::NONE}; } } @@ -905,7 +903,7 @@ struct tree_update_policy_t { const std::vector& x) = 0; virtual void on_node_completed(mip_node_t* node, node_status_t status, - rounding_direction_t dir) = 0; + branch_direction_t dir) = 0; virtual void on_numerical_issue(mip_node_t*) = 0; virtual void graphviz(search_tree_t&, mip_node_t*, const char*, f_t) = 0; virtual void on_optimal_callback(const std::vector&, f_t) = 0; @@ -950,9 +948,7 @@ struct nondeterministic_policy_t : tree_update_policy_t { const std::vector& x) override { if (worker->search_strategy == search_strategy_t::BEST_FIRST) { - logger_t pc_log; - pc_log.log = false; - node->objective_estimate = bnb.pc_.obj_estimate(fractional, x, node->lower_bound, pc_log); + node->objective_estimate = bnb.pc_.obj_estimate(fractional, x, node->lower_bound); } } @@ -984,7 +980,7 @@ struct nondeterministic_policy_t : tree_update_policy_t { } } - void on_node_completed(mip_node_t*, node_status_t, rounding_direction_t) override {} + void on_node_completed(mip_node_t*, node_status_t, branch_direction_t) override {} }; template @@ -1003,7 +999,7 @@ struct deterministic_policy_base_t : tree_update_policy_t { { if (node->branch_var < 0) return; f_t change = std::max(leaf_obj - node->lower_bound, f_t(0)); - f_t frac = node->branch_dir == rounding_direction_t::DOWN + f_t frac = node->branch_dir == branch_direction_t::DOWN ? node->fractional_val - std::floor(node->fractional_val) : std::ceil(node->fractional_val) - node->fractional_val; if (frac > 1e-10) { @@ -1038,9 +1034,7 @@ struct deterministic_bfs_policy_t const std::vector& fractional, const std::vector& x) override { - logger_t log; - log.log = false; - i_t var = this->worker.pc_snapshot.variable_selection(fractional, x, log); + i_t var = this->worker.pc_snapshot.variable_selection(fractional, x); auto dir = martin_criteria(x[var], this->bnb.root_relax_soln_.x[var]); return {var, dir}; } @@ -1052,12 +1046,12 @@ struct deterministic_bfs_policy_t logger_t log; log.log = false; node->objective_estimate = - this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound, log); + this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound); } void on_node_completed(mip_node_t* node, node_status_t status, - rounding_direction_t dir) override + branch_direction_t dir) override { switch (status) { case node_status_t::INFEASIBLE: this->worker.record_infeasible(node); break; @@ -1146,7 +1140,7 @@ struct deterministic_diving_policy_t log); } - default: CUOPT_LOG_ERROR("Invalid diving method!"); return {-1, rounding_direction_t::NONE}; + default: CUOPT_LOG_ERROR("Invalid diving method!"); return {-1, branch_direction_t::NONE}; } } @@ -1158,10 +1152,10 @@ struct deterministic_diving_policy_t void on_node_completed(mip_node_t* node, node_status_t status, - rounding_direction_t dir) override + branch_direction_t dir) override { if (status == node_status_t::HAS_CHILDREN) { - if (dir == rounding_direction_t::UP) { + if (dir == branch_direction_t::UP) { stack.push_front(node->get_down_child()); stack.push_front(node->get_up_child()); } else { @@ -1180,7 +1174,7 @@ struct deterministic_diving_policy_t template template -std::pair branch_and_bound_t::update_tree_impl( +std::pair branch_and_bound_t::update_tree_impl( mip_node_t* node_ptr, search_tree_t& search_tree, WorkerT* worker, @@ -1192,7 +1186,7 @@ std::pair branch_and_bound_t::upd lp_solution_t& leaf_solution = worker->leaf_solution; const f_t upper_bound = policy.upper_bound(); node_status_t status = node_status_t::PENDING; - rounding_direction_t round_dir = rounding_direction_t::NONE; + branch_direction_t round_dir = branch_direction_t::NONE; worker->recompute_basis = true; worker->recompute_bounds = true; @@ -1253,7 +1247,7 @@ std::pair branch_and_bound_t::upd assert(node_ptr->vstatus.size() == leaf_problem.num_cols); assert(branch_var >= 0); - assert(dir != rounding_direction_t::NONE); + assert(dir != branch_direction_t::NONE); policy.update_objective_estimate(node_ptr, leaf_fractional, leaf_solution.x); worker->recompute_basis = false; @@ -1294,7 +1288,7 @@ std::pair branch_and_bound_t::upd } template -std::pair branch_and_bound_t::update_tree( +std::pair branch_and_bound_t::update_tree( mip_node_t* node_ptr, search_tree_t& search_tree, branch_and_bound_worker_t* worker, @@ -1387,7 +1381,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( node_ptr->node_id, node_ptr->depth, node_ptr->branch_var, - node_ptr->branch_dir == rounding_direction_t::DOWN ? "DOWN" : "UP", + node_ptr->branch_dir == branch_direction_t::DOWN ? "DOWN" : "UP", node_ptr->fractional_val, node_ptr->branch_var_lower, node_ptr->branch_var_upper, @@ -1521,7 +1515,7 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_tget_down_child()); } else { @@ -1633,7 +1627,7 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t worker->recompute_bounds = node_status != node_status_t::HAS_CHILDREN; if (node_status == node_status_t::HAS_CHILDREN) { - if (round_dir == rounding_direction_t::UP) { + if (round_dir == branch_direction_t::UP) { stack.push_front(node_ptr->get_down_child()); stack.push_front(node_ptr->get_up_child()); } else { @@ -2588,7 +2582,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } // Choose variable to branch on - i_t branch_var = pc_.variable_selection(fractional, root_relax_soln_.x, log); + i_t branch_var = pc_.variable_selection(fractional, root_relax_soln_.x); search_tree_.root = std::move(mip_node_t(root_objective_, root_vstatus_)); search_tree_.num_nodes = 0; diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 6a9269bb2f..c202b484f9 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -107,7 +107,7 @@ class branch_and_bound_t { } } - // Set a solution based on the user problem during solve time + // Set a solution based on the user problem during the course of the solve void set_new_solution(const std::vector& solution); // This queues the solution to be processed at the correct work unit timestamp @@ -318,7 +318,7 @@ class branch_and_bound_t { // Policy-based tree update shared between opportunistic and deterministic codepaths. template - std::pair update_tree_impl( + std::pair update_tree_impl( mip_node_t* node_ptr, search_tree_t& search_tree, WorkerT* worker, @@ -326,7 +326,7 @@ class branch_and_bound_t { Policy& policy); // Opportunistic tree update wrapper. - std::pair update_tree( + std::pair update_tree( mip_node_t* node_ptr, search_tree_t& search_tree, branch_and_bound_worker_t* worker, diff --git a/cpp/src/branch_and_bound/constants.hpp b/cpp/src/branch_and_bound/constants.hpp index ab8677095c..39bfa0bf3a 100644 --- a/cpp/src/branch_and_bound/constants.hpp +++ b/cpp/src/branch_and_bound/constants.hpp @@ -24,7 +24,7 @@ enum search_strategy_t : int { COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) }; -enum class rounding_direction_t { NONE = -1, DOWN = 0, UP = 1 }; +enum class branch_direction_t { NONE = -1, DOWN = 0, UP = 1 }; enum class branch_and_bound_mode_t { PARALLEL = 0, DETERMINISTIC = 1 }; diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp index a5c3769126..53d7e4ef65 100644 --- a/cpp/src/branch_and_bound/deterministic_workers.hpp +++ b/cpp/src/branch_and_bound/deterministic_workers.hpp @@ -90,7 +90,9 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { const std::vector& var_types, const simplex_solver_settings_t& settings, const std::string& context_name) - : base_t(id, original_lp, Arow, var_types, settings), work_context(context_name), pc_snapshot(1) + : base_t(id, original_lp, Arow, var_types, settings), + work_context(context_name), + pc_snapshot(1, settings) { work_context.deterministic = true; } @@ -156,7 +158,7 @@ class deterministic_bfs_worker_t mip_node_t* enqueue_children_for_plunge(mip_node_t* down_child, mip_node_t* up_child, - rounding_direction_t preferred_direction) + branch_direction_t preferred_direction) { if (!plunge_stack.empty()) { backlog.push(plunge_stack.back()); @@ -169,7 +171,7 @@ class deterministic_bfs_worker_t up_child->creation_seq = next_creation_seq++; mip_node_t* first_child; - if (preferred_direction == rounding_direction_t::UP) { + if (preferred_direction == branch_direction_t::UP) { plunge_stack.push_front(down_child); plunge_stack.push_front(up_child); first_child = up_child; diff --git a/cpp/src/branch_and_bound/diving_heuristics.cpp b/cpp/src/branch_and_bound/diving_heuristics.cpp index 571027c1d7..ec47307976 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.cpp +++ b/cpp/src/branch_and_bound/diving_heuristics.cpp @@ -17,26 +17,26 @@ branch_variable_t line_search_diving(const std::vector& fractional, const std::vector& root_solution, logger_t& log) { - constexpr f_t eps = 1e-6; - i_t branch_var = -1; - f_t min_score = std::numeric_limits::max(); - rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + i_t branch_var = -1; + f_t min_score = std::numeric_limits::max(); + branch_direction_t round_dir = branch_direction_t::NONE; for (i_t j : fractional) { - f_t score = inf; - rounding_direction_t dir = rounding_direction_t::NONE; + f_t score = inf; + branch_direction_t dir = branch_direction_t::NONE; if (solution[j] < root_solution[j] - eps) { f_t f = solution[j] - std::floor(solution[j]); f_t d = root_solution[j] - solution[j]; score = f / d; - dir = rounding_direction_t::DOWN; + dir = branch_direction_t::DOWN; } else if (solution[j] > root_solution[j] + eps) { f_t f = std::ceil(solution[j]) - solution[j]; f_t d = solution[j] - root_solution[j]; score = f / d; - dir = rounding_direction_t::UP; + dir = branch_direction_t::UP; } if (min_score > score) { @@ -48,12 +48,12 @@ branch_variable_t line_search_diving(const std::vector& fractional, // If the current solution is equal to the root solution, arbitrarily // set the branch variable to the first fractional variable and round it down - if (round_dir == rounding_direction_t::NONE) { + if (round_dir == branch_direction_t::NONE) { branch_var = fractional[0]; - round_dir = rounding_direction_t::DOWN; + round_dir = branch_direction_t::DOWN; } - assert(round_dir != rounding_direction_t::NONE); + assert(round_dir != branch_direction_t::NONE); assert(branch_var >= 0); log.debug("Line search diving: selected var %d with val = %e, round dir = %d and score = %e\n", @@ -73,50 +73,43 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, logger_t& log) { const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; + if (num_fractional == 0) return {-1, branch_direction_t::NONE}; - pseudo_cost_averages_t avgs = pc.compute_averages(); + pseudo_cost_averages_t averages = pc.compute_averages(); - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - rounding_direction_t round_dir = rounding_direction_t::DOWN; - constexpr f_t eps = f_t(1e-6); + i_t branch_var = fractional[0]; + f_t max_score = std::numeric_limits::lowest(); + branch_direction_t round_dir = branch_direction_t::DOWN; + constexpr f_t eps = f_t(1e-6); for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t pc_down = pc.pseudo_cost_num_down[j] != 0 - ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] - : avgs.down_avg; - f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] - : avgs.up_avg; + auto [f_up, f_down, pc_up, pc_down] = pc.get_pseudocost(j, solution, averages); + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); - f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); - f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); - - f_t score = 0; - rounding_direction_t dir = rounding_direction_t::DOWN; + f_t score = 0; + branch_direction_t dir = branch_direction_t::DOWN; f_t root_val = (j < static_cast(root_solution.size())) ? root_solution[j] : solution[j]; if (solution[j] < root_val - f_t(0.4)) { score = score_down; - dir = rounding_direction_t::DOWN; + dir = branch_direction_t::DOWN; } else if (solution[j] > root_val + f_t(0.4)) { score = score_up; - dir = rounding_direction_t::UP; + dir = branch_direction_t::UP; } else if (f_down < f_t(0.3)) { score = score_down; - dir = rounding_direction_t::DOWN; + dir = branch_direction_t::DOWN; } else if (f_down > f_t(0.7)) { score = score_up; - dir = rounding_direction_t::UP; + dir = branch_direction_t::UP; } else if (pc_down < pc_up + eps) { score = score_down; - dir = rounding_direction_t::DOWN; + dir = branch_direction_t::DOWN; } else { score = score_up; - dir = rounding_direction_t::UP; + dir = branch_direction_t::UP; } if (score > max_score) { @@ -126,9 +119,9 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, } } - if (round_dir == rounding_direction_t::NONE) { + if (round_dir == branch_direction_t::NONE) { branch_var = fractional[0]; - round_dir = rounding_direction_t::DOWN; + round_dir = branch_direction_t::DOWN; } return {branch_var, round_dir}; @@ -142,30 +135,24 @@ branch_variable_t guided_diving(pseudo_costs_t& pc, logger_t& log) { const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; + if (num_fractional == 0) return {-1, branch_direction_t::NONE}; - pseudo_cost_averages_t avgs = pc.compute_averages(); + pseudo_cost_averages_t averages = pc.compute_averages(); - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - rounding_direction_t round_dir = rounding_direction_t::DOWN; - constexpr f_t eps = f_t(1e-6); + i_t branch_var = fractional[0]; + f_t max_score = std::numeric_limits::lowest(); + branch_direction_t round_dir = branch_direction_t::DOWN; + constexpr f_t eps = f_t(1e-6); for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); - rounding_direction_t dir = - down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; - - f_t pc_down = pc.pseudo_cost_num_down[j] != 0 - ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] - : avgs.down_avg; - f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] - : avgs.up_avg; - f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; - f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; + branch_direction_t dir = + down_dist < up_dist + eps ? branch_direction_t::DOWN : branch_direction_t::UP; + + auto [f_up, f_down, pc_up, pc_down] = pc.get_pseudocost(j, solution, averages); + f_t score1 = dir == branch_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; + f_t score2 = dir == branch_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; f_t score = (score1 + score2) / 6; if (score > max_score) { @@ -209,10 +196,10 @@ branch_variable_t coefficient_diving(const lp_problem_t& lp_probl const std::vector& down_locks, logger_t& log) { - i_t branch_var = -1; - i_t min_locks = std::numeric_limits::max(); - rounding_direction_t round_dir = rounding_direction_t::NONE; - constexpr f_t eps = 1e-6; + i_t branch_var = -1; + i_t min_locks = std::numeric_limits::max(); + branch_direction_t round_dir = branch_direction_t::NONE; + constexpr f_t eps = 1e-6; for (i_t j : fractional) { f_t f_down = solution[j] - std::floor(solution[j]); @@ -230,18 +217,18 @@ branch_variable_t coefficient_diving(const lp_problem_t& lp_probl branch_var = j; if (up_lock < down_lock) { - round_dir = rounding_direction_t::UP; + round_dir = branch_direction_t::UP; } else if (up_lock > down_lock) { - round_dir = rounding_direction_t::DOWN; + round_dir = branch_direction_t::DOWN; } else if (f_down < f_up + eps) { - round_dir = rounding_direction_t::DOWN; + round_dir = branch_direction_t::DOWN; } else { - round_dir = rounding_direction_t::UP; + round_dir = branch_direction_t::UP; } } } - assert(round_dir != rounding_direction_t::NONE); + assert(round_dir != branch_direction_t::NONE); assert(branch_var >= 0); log.debug( diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index cce23c3bd7..694a7099c4 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -68,7 +68,7 @@ class mip_node_t { parent(nullptr), node_id(0), branch_var(-1), - branch_dir(rounding_direction_t::NONE), + branch_dir(branch_direction_t::NONE), branch_var_lower(-std::numeric_limits::infinity()), branch_var_upper(std::numeric_limits::infinity()), fractional_val(std::numeric_limits::infinity()), @@ -86,7 +86,7 @@ class mip_node_t { parent(nullptr), node_id(0), branch_var(-1), - branch_dir(rounding_direction_t::NONE), + branch_dir(branch_direction_t::NONE), integer_infeasible(-1), objective_estimate(std::numeric_limits::infinity()), vstatus(basis) @@ -99,7 +99,7 @@ class mip_node_t { mip_node_t* parent_node, i_t node_num, i_t branch_variable, - rounding_direction_t branch_direction, + branch_direction_t branch_direction, f_t branch_var_value, i_t integer_inf, const std::vector& basis) @@ -115,10 +115,10 @@ class mip_node_t { objective_estimate(parent_node->objective_estimate), vstatus(basis) { - branch_var_lower = branch_direction == rounding_direction_t::DOWN ? problem.lower[branch_var] - : std::ceil(branch_var_value); - branch_var_upper = branch_direction == rounding_direction_t::DOWN ? std::floor(branch_var_value) - : problem.upper[branch_var]; + branch_var_lower = branch_direction == branch_direction_t::DOWN ? problem.lower[branch_var] + : std::ceil(branch_var_value); + branch_var_upper = branch_direction == branch_direction_t::DOWN ? std::floor(branch_var_value) + : problem.upper[branch_var]; children[0] = nullptr; children[1] = nullptr; } @@ -286,7 +286,7 @@ class mip_node_t { i_t depth; i_t node_id; i_t branch_var; - rounding_direction_t branch_dir; + branch_direction_t branch_dir; f_t branch_var_lower; f_t branch_var_upper; f_t fractional_val; @@ -316,7 +316,7 @@ class mip_node_t { const mip_node_t* node = this; while (node != nullptr && node->branch_var >= 0) { uint64_t step = static_cast(node->branch_var) << 1; - step |= (node->branch_dir == rounding_direction_t::UP) ? 1 : 0; + step |= (node->branch_dir == branch_direction_t::UP) ? 1 : 0; path_steps.push_back(step); node = node->parent; } @@ -363,7 +363,7 @@ class search_tree_t { parent_node, ++id, branch_var, - rounding_direction_t::DOWN, + branch_direction_t::DOWN, fractional_val, integer_infeasible, parent_vstatus); @@ -371,14 +371,14 @@ class search_tree_t { parent_node, down_child.get(), branch_var, - rounding_direction_t::DOWN, + branch_direction_t::DOWN, std::floor(fractional_val)); auto up_child = std::make_unique>(original_lp, parent_node, ++id, branch_var, - rounding_direction_t::UP, + branch_direction_t::UP, fractional_val, integer_infeasible, parent_vstatus); @@ -387,7 +387,7 @@ class search_tree_t { parent_node, up_child.get(), branch_var, - rounding_direction_t::UP, + branch_direction_t::UP, std::ceil(fractional_val)); assert(parent_vstatus.size() == original_lp.num_cols); @@ -409,7 +409,7 @@ class search_tree_t { const mip_node_t* origin_ptr, const mip_node_t* dest_ptr, const i_t branch_var, - rounding_direction_t branch_dir, + branch_direction_t branch_dir, const f_t bound) { if (write_graphviz) { @@ -417,7 +417,7 @@ class search_tree_t { origin_ptr->node_id, dest_ptr->node_id, branch_var, - branch_dir == rounding_direction_t::DOWN ? "<=" : ">=", + branch_dir == branch_direction_t::DOWN ? "<=" : ">=", bound); } } diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index cf67a69046..6e2f5cf4d9 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -897,9 +897,9 @@ static void batch_pdlp_reliability_branching_task( std::vector& pdlp_obj_down, std::vector& pdlp_obj_up) { - log.printf(rb_mode == 2 ? "RB batch PDLP only for %d candidates\n" - : "RB cooperative batch PDLP and DS for %d candidates\n", - num_candidates); + log.debug(rb_mode == 2 ? "RB batch PDLP only for %d candidates\n" + : "RB cooperative batch PDLP and DS for %d candidates\n", + num_candidates); f_t start_batch = tic(); @@ -961,7 +961,7 @@ static void batch_pdlp_reliability_branching_task( if (solutions.get_additional_termination_informations().size() != static_cast(num_candidates) * 2) { - log.printf("RB batch PDLP failed and produced no solutions\n"); + log.debug("RB batch PDLP failed and produced no solutions\n"); return; } @@ -972,10 +972,10 @@ static void batch_pdlp_reliability_branching_task( } } - log.printf("RB batch PDLP completed in %.2fs. Solved %d/%d\n", - batch_pdlp_time, - amount_done, - num_candidates * 2); + log.debug("RB batch PDLP completed in %.2fs. Solved %d/%d\n", + batch_pdlp_time, + amount_done, + num_candidates * 2); for (i_t k = 0; k < num_candidates; k++) { if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { @@ -1240,19 +1240,18 @@ void strong_branching(const lp_problem_t& original_lp, } template -f_t pseudo_costs_t::calculate_pseudocost_score( +pseudo_cost_value_t pseudo_costs_t::get_pseudocost( i_t j, const std::vector& solution, pseudo_cost_averages_t averages) const { - constexpr f_t eps = 1e-6; - i_t num_up = pseudo_cost_num_up[j]; - f_t sum_up = pseudo_cost_sum_up[j]; - i_t num_down = pseudo_cost_num_down[j]; - f_t sum_down = pseudo_cost_sum_down[j]; - f_t pc_up = num_up > 0 ? sum_up / num_up : averages.up_avg; - f_t pc_down = num_down > 0 ? sum_down / num_down : averages.down_avg; - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + i_t num_up = pseudo_cost_num_up[j]; + f_t sum_up = pseudo_cost_sum_up[j]; + i_t num_down = pseudo_cost_num_down[j]; + f_t sum_down = pseudo_cost_sum_down[j]; + f_t pc_up = num_up > 0 ? sum_up / num_up : averages.up_avg; + f_t pc_down = num_down > 0 ? sum_down / num_down : averages.down_avg; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + return {.f_up = f_up, .f_down = f_down, .pc_up = pc_up, .pc_down = pc_down}; } template @@ -1260,11 +1259,11 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t f_t leaf_objective) { const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0); - const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN + const f_t frac = node_ptr->branch_dir == branch_direction_t::DOWN ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) : std::ceil(node_ptr->fractional_val) - node_ptr->fractional_val; - if (node_ptr->branch_dir == rounding_direction_t::DOWN) { + if (node_ptr->branch_dir == branch_direction_t::DOWN) { pseudo_cost_sum_down[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_down[node_ptr->branch_var]++; } else { @@ -1303,21 +1302,22 @@ pseudo_cost_averages_t pseudo_costs_t::compute_aver template i_t pseudo_costs_t::variable_selection(const std::vector& fractional, - const std::vector& solution, - logger_t& log) + const std::vector& solution) { + constexpr f_t eps = 1e-6; i_t branch_var = fractional[0]; f_t max_score = -1; pseudo_cost_averages_t averages = compute_averages(); - log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - averages.num_init_down, - averages.num_init_up, - averages.down_avg, - averages.up_avg); + settings.log.debug("PC: num initialized down %d up %d avg down %e up %e\n", + averages.num_init_down, + averages.num_init_up, + averages.down_avg, + averages.up_avg); for (i_t j : fractional) { - f_t score = calculate_pseudocost_score(j, solution, averages); + auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, solution, averages); + f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); if (score > max_score) { max_score = score; @@ -1325,10 +1325,10 @@ i_t pseudo_costs_t::variable_selection(const std::vector } } - log.debug("Pseudocost branching on %d. Value %e. Score %e.\n", - branch_var, - solution[branch_var], - max_score); + settings.log.debug("Pseudocost branching on %d. Value %e. Score %e.\n", + branch_var, + solution[branch_var], + max_score); return branch_var; } @@ -1340,10 +1340,8 @@ i_t pseudo_costs_t::reliable_variable_selection( branch_and_bound_worker_t* worker, const std::vector& var_types, const branch_and_bound_stats_t& bnb_stats, - const simplex_solver_settings_t& settings, f_t upper_bound, int max_num_tasks, - logger_t& log, const std::vector& new_slacks, const lp_problem_t& original_lp) { @@ -1385,11 +1383,11 @@ i_t pseudo_costs_t::reliable_variable_selection( // method). if (reliable_threshold == 0) { averages = compute_averages(); - log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - averages.num_init_down, - averages.num_init_up, - averages.down_avg, - averages.up_avg); + settings.log.debug("PC: num initialized down %d up %d avg down %e up %e\n", + averages.num_init_down, + averages.num_init_up, + averages.down_avg, + averages.up_avg); } std::vector> unreliable_list; @@ -1401,7 +1399,8 @@ i_t pseudo_costs_t::reliable_variable_selection( unreliable_list.push_back(std::make_pair(-1, j)); continue; } - f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); + auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, leaf_solution.x, averages); + f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); if (score > max_score) { max_score = score; @@ -1410,10 +1409,10 @@ i_t pseudo_costs_t::reliable_variable_selection( } if (unreliable_list.empty()) { - log.printf("pc branching on %d. Value %e. Score %e\n", - branch_var, - leaf_solution.x[branch_var], - max_score); + settings.log.debug("pc branching on %d. Value %e. Score %e\n", + branch_var, + leaf_solution.x[branch_var], + max_score); return branch_var; } @@ -1436,18 +1435,18 @@ i_t pseudo_costs_t::reliable_variable_selection( min_percent_solved_by_batch_pdlp_at_root_for_pdlp); if (rb_mode != 0 && !pdlp_warm_cache->populated) { - log.printf("PDLP warm start data not populated, using DS only\n"); + settings.log.debug("PDLP warm start data not populated, using DS only\n"); } else if (rb_mode != 0 && settings.sub_mip) { - log.printf("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n"); + settings.log.debug("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n"); } else if (rb_mode != 0 && settings.deterministic) { - log.printf( + settings.log.debug( "Batch PDLP reliability branching is disabled because deterministic mode is enabled\n"); } else if (rb_mode != 0 && unreliable_list.size() < min_num_candidates_for_pdlp) { - log.printf("Not enough candidates to use batch PDLP, using DS only\n"); + settings.log.debug("Not enough candidates to use batch PDLP, using DS only\n"); } else if (rb_mode != 0 && pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root < 5.0) { - log.printf("Percent solved by batch PDLP at root is too low, using DS only\n"); + settings.log.debug("Percent solved by batch PDLP at root is too low, using DS only\n"); } else if (use_pdlp) { - log.printf( + settings.log.debug( "Using batch PDLP because populated, unreliable list size is %d (> %d), and percent solved " "by batch PDLP at root is %f%% (> %f%%)\n", static_cast(unreliable_list.size()), @@ -1468,7 +1467,7 @@ i_t pseudo_costs_t::reliable_variable_selection( assert(num_candidates > 0); assert(num_tasks > 0); - log.printf( + settings.log.debug( "RB iters = %d, B&B iters = %d, unreliable = %d, num_tasks = %d, reliable_threshold = %d\n", static_cast(strong_branching_lp_iter), branch_and_bound_lp_iters, @@ -1517,7 +1516,8 @@ i_t pseudo_costs_t::reliable_variable_selection( score = std::max(estimate.up_obj_change, eps) * std::max(estimate.down_obj_change, eps); } else { // Use the previous score, even if it is unreliable - score = calculate_pseudocost_score(j, leaf_solution.x, averages); + auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, leaf_solution.x, averages); + score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); } } } else { @@ -1555,7 +1555,7 @@ i_t pseudo_costs_t::reliable_variable_selection( if (use_pdlp) { #pragma omp task default(shared) - batch_pdlp_reliability_branching_task(log, + batch_pdlp_reliability_branching_task(settings.log, rb_mode, num_candidates, start_time, @@ -1573,7 +1573,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } if (toc(start_time) > settings.time_limit) { - log.printf("Time limit reached\n"); + settings.log.debug("Time limit reached\n"); if (use_pdlp) { concurrent_halt.store(1); #pragma omp taskwait @@ -1603,7 +1603,7 @@ i_t pseudo_costs_t::reliable_variable_selection( if (toc(start_time) > settings.time_limit) { continue; } if (rb_mode == 1 && sb_view.is_solved(i)) { - log.printf( + settings.log.debug( "DS skipping variable %d branch down (shared_idx %d): already solved by PDLP\n", j, i); } else { pseudo_cost_mutex_down[j].lock(); @@ -1648,9 +1648,10 @@ i_t pseudo_costs_t::reliable_variable_selection( const i_t shared_idx = i + num_candidates; if (rb_mode == 1 && sb_view.is_solved(shared_idx)) { - log.printf("DS skipping variable %d branch up (shared_idx %d): already solved by PDLP\n", - j, - shared_idx); + settings.log.debug( + "DS skipping variable %d branch up (shared_idx %d): already solved by PDLP\n", + j, + shared_idx); } else { pseudo_cost_mutex_up[j].lock(); if (pseudo_cost_num_up[j] < reliable_threshold) { @@ -1691,7 +1692,9 @@ i_t pseudo_costs_t::reliable_variable_selection( if (toc(start_time) > settings.time_limit) { continue; } - score = calculate_pseudocost_score(j, leaf_solution.x, averages); + auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, leaf_solution.x, averages); + score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + score_mutex.lock(); if (score > max_score) { max_score = score; @@ -1708,14 +1711,14 @@ i_t pseudo_costs_t::reliable_variable_selection( // TODO put back // if (rb_mode != 2) { // if (rb_mode == 1) { - // log.printf( + // settings.log.debug( // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed, %d skipped // (PDLP) in %.2fs\n", num_candidates, dual_simplex_optimal.load(), num_candidates * 2, // dual_simplex_infeasible.load(), num_candidates * 2, // dual_simplex_failed.load(), num_candidates * 2, // dual_simplex_skipped.load(), dual_simplex_elapsed); // } else { - // log.printf( + // settings.log.debug( // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed in // %.2fs\n", num_candidates, dual_simplex_optimal.load(), num_candidates * 2, // dual_simplex_infeasible.load(), num_candidates * 2, dual_simplex_failed.load(), @@ -1771,21 +1774,23 @@ i_t pseudo_costs_t::reliable_variable_selection( } } - f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); + auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, leaf_solution.x, averages); + f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + if (score > max_score) { max_score = score; branch_var = j; } } - log.printf("RB batch PDLP: %d candidates, %d/%d optimal, %d applied to pseudo-costs\n", - num_candidates, - pdlp_optimal, - num_candidates * 2, - pdlp_applied); + settings.log.debug("RB batch PDLP: %d candidates, %d/%d optimal, %d applied to pseudo-costs\n", + num_candidates, + pdlp_optimal, + num_candidates * 2, + pdlp_applied); } - log.printf( + settings.log.debug( "pc branching on %d. Value %e. Score %e\n", branch_var, leaf_solution.x[branch_var], max_score); return branch_var; @@ -1794,8 +1799,7 @@ i_t pseudo_costs_t::reliable_variable_selection( template f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, const std::vector& solution, - f_t lower_bound, - logger_t& log) + f_t lower_bound) { const i_t num_fractional = fractional.size(); f_t estimate = lower_bound; @@ -1803,17 +1807,12 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& frac pseudo_cost_averages_t averages = compute_averages(); for (i_t j : fractional) { - constexpr f_t eps = 1e-6; - i_t num_up = pseudo_cost_num_up[j]; - i_t num_down = pseudo_cost_num_down[j]; - f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : averages.up_avg; - f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : averages.down_avg; - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; + constexpr f_t eps = 1e-6; + auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, solution, averages); estimate += std::min(pc_down * f_down, pc_up * f_up); } - log.printf("pseudocost estimate = %e\n", estimate); + settings.log.debug("pseudocost estimate = %e\n", estimate); return estimate; } diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 9635a6ca7f..7418055dd1 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -69,7 +69,7 @@ struct reliability_branching_settings_t { template struct branch_variable_t { i_t variable; - rounding_direction_t direction; + branch_direction_t direction; }; template @@ -92,10 +92,18 @@ struct pseudo_cost_averages_t { i_t num_init_up; }; +template +struct pseudo_cost_value_t { + f_t f_up; + f_t f_down; + f_t pc_up; + f_t pc_down; +}; + template struct pseudo_cost_update_t { i_t variable; - rounding_direction_t direction; + branch_direction_t direction; f_t delta; double work_timestamp; int worker_id; @@ -137,8 +145,9 @@ class pseudo_costs_t { using mutex_type = std::conditional_t; - explicit pseudo_costs_t(i_t num_variables) - : pseudo_cost_sum_down(num_variables), + explicit pseudo_costs_t(i_t num_variables, const simplex_solver_settings_t& settings) + : settings(settings), + pseudo_cost_sum_down(num_variables), pseudo_cost_sum_up(num_variables), pseudo_cost_num_down(num_variables), pseudo_cost_num_up(num_variables), @@ -154,7 +163,7 @@ class pseudo_costs_t { void merge_updates(const std::vector>& updates) { for (const auto& upd : updates) { - if (upd.direction == rounding_direction_t::DOWN) { + if (upd.direction == branch_direction_t::DOWN) { pseudo_cost_sum_down[upd.variable] += upd.delta; pseudo_cost_num_down[upd.variable]++; } else { @@ -178,22 +187,17 @@ class pseudo_costs_t { f_t obj_estimate(const std::vector& fractional, const std::vector& solution, - f_t lower_bound, - logger_t& log); + f_t lower_bound); - i_t variable_selection(const std::vector& fractional, - const std::vector& solution, - logger_t& log); + i_t variable_selection(const std::vector& fractional, const std::vector& solution); i_t reliable_variable_selection(const mip_node_t* node_ptr, const std::vector& fractional, branch_and_bound_worker_t* worker, const std::vector& var_types, const branch_and_bound_stats_t& bnb_stats, - const simplex_solver_settings_t& settings, f_t upper_bound, int max_num_tasks, - logger_t& log, const std::vector& new_slacks, const lp_problem_t& original_lp); @@ -208,11 +212,12 @@ class pseudo_costs_t { detail::compute_hash(pseudo_cost_num_down) ^ detail::compute_hash(pseudo_cost_num_up); } - f_t calculate_pseudocost_score(i_t j, - const std::vector& solution, - pseudo_cost_averages_t averages) const; + pseudo_cost_value_t get_pseudocost(i_t j, + const std::vector& solution, + pseudo_cost_averages_t averages) const; reliability_branching_settings_t reliability_branching_settings; + simplex_solver_settings_t settings; std::shared_ptr> AT; // Transpose of the constraint matrix A std::vector pseudo_cost_sum_up; @@ -233,32 +238,33 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { public: using Base = pseudo_costs_t; - pseudo_cost_snapshot_t(i_t num_variables) : Base(num_variables) {}; + pseudo_cost_snapshot_t(i_t num_variables, const simplex_solver_settings_t& settings) + : Base(num_variables, settings) {}; pseudo_cost_snapshot_t(const pseudo_costs_t& other) - : Base(1) + : Base(1, other.settings) { *this = other; } - pseudo_cost_snapshot_t(const Base& other) : Base(1) { *this = other; } + pseudo_cost_snapshot_t(const Base& other) : Base(1, other.settings) { *this = other; } pseudo_cost_snapshot_t& operator=( const pseudo_costs_t& other) { - Base::AT = other.AT; - Base::pdlp_warm_cache = other.pdlp_warm_cache; + this->AT = other.AT; + this->pdlp_warm_cache = other.pdlp_warm_cache; i_t n = other.pseudo_cost_num_down.size(); - Base::pseudo_cost_num_down.resize(n); - Base::pseudo_cost_num_up.resize(n); - Base::pseudo_cost_sum_down.resize(n); - Base::pseudo_cost_sum_up.resize(n); + this->pseudo_cost_num_down.resize(n); + this->pseudo_cost_num_up.resize(n); + this->pseudo_cost_sum_down.resize(n); + this->pseudo_cost_sum_up.resize(n); for (i_t i = 0; i < n; ++i) { - Base::pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].underlying(); - Base::pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].underlying(); - Base::pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].underlying(); - Base::pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].underlying(); + this->pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].underlying(); + this->pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].underlying(); + this->pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].underlying(); + this->pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].underlying(); } return *this; @@ -267,26 +273,26 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { pseudo_cost_snapshot_t& operator=(const Base& other) { if (this != &other) { - Base::AT = other.AT; - Base::pdlp_warm_cache = other.pdlp_warm_cache; - Base::pseudo_cost_num_down = other.pseudo_cost_num_down; - Base::pseudo_cost_num_up = other.pseudo_cost_num_up; - Base::pseudo_cost_sum_down = other.pseudo_cost_sum_down; - Base::pseudo_cost_sum_up = other.pseudo_cost_sum_up; + this->AT = other.AT; + this->pdlp_warm_cache = other.pdlp_warm_cache; + this->pseudo_cost_num_down = other.pseudo_cost_num_down; + this->pseudo_cost_num_up = other.pseudo_cost_num_up; + this->pseudo_cost_sum_down = other.pseudo_cost_sum_down; + this->pseudo_cost_sum_up = other.pseudo_cost_sum_up; } return *this; }; void queue_update( - i_t variable, rounding_direction_t direction, f_t delta, double clock, int worker_id) + i_t variable, branch_direction_t direction, f_t delta, double clock, int worker_id) { updates_.push_back({variable, direction, delta, clock, worker_id}); - if (direction == rounding_direction_t::DOWN) { - Base::pseudo_cost_sum_down[variable] += delta; - ++Base::pseudo_cost_num_down[variable]; + if (direction == branch_direction_t::DOWN) { + this->pseudo_cost_sum_down[variable] += delta; + ++this->pseudo_cost_num_down[variable]; } else { - Base::pseudo_cost_sum_up[variable] += delta; - ++Base::pseudo_cost_num_up[variable]; + this->pseudo_cost_sum_up[variable] += delta; + ++this->pseudo_cost_num_up[variable]; } } @@ -297,7 +303,7 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { return result; } - i_t n_vars() const { return Base::pseudo_cost_sum_down.size(); } + i_t n_vars() const { return this->pseudo_cost_sum_down.size(); } private: std::vector> updates_; From 8b27e1b7ce486881eb163dc9ac58c3d41f586c0e Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 1 May 2026 07:45:47 +0200 Subject: [PATCH 48/53] benchmarking the impact of atomics and mutexes in the deterministic mode Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/pseudo_costs.hpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 7418055dd1..1f801a41f9 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -131,19 +131,15 @@ class pseudo_costs_t { public: // Define the types used for storing the pseudocost of each variable. // Disable or enable atomics depending on if we are in REGULAR or DETERMINISTIC modes - using float_type = - std::conditional_t, f_t>; + using float_type = omp_atomic_t; - using int_type = - std::conditional_t, i_t>; + using int_type = omp_atomic_t; // Counting the number of LP iterations might require more than an int32 can hold. - using int64_type = std:: - conditional_t, int64_t>; + using int64_type = omp_atomic_t; // Disable or enable mutexes depending on if we are in REGULAR or DETERMINISTIC modes - using mutex_type = - std::conditional_t; + using mutex_type = omp_mutex_t; explicit pseudo_costs_t(i_t num_variables, const simplex_solver_settings_t& settings) : settings(settings), From 8a088f79b2c60f3136793e01da9bbcf6f1e78717 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 1 May 2026 16:43:13 +0200 Subject: [PATCH 49/53] revert changes to calculate_pseudocost Signed-off-by: Nicolas L. Guidotti --- .../branch_and_bound/diving_heuristics.cpp | 12 ++-- cpp/src/branch_and_bound/pseudo_costs.cpp | 58 +++++++------------ cpp/src/branch_and_bound/pseudo_costs.hpp | 17 ++++-- 3 files changed, 41 insertions(+), 46 deletions(-) diff --git a/cpp/src/branch_and_bound/diving_heuristics.cpp b/cpp/src/branch_and_bound/diving_heuristics.cpp index ec47307976..c0e31c7d89 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.cpp +++ b/cpp/src/branch_and_bound/diving_heuristics.cpp @@ -83,9 +83,11 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, constexpr f_t eps = f_t(1e-6); for (i_t j : fractional) { - auto [f_up, f_down, pc_up, pc_down] = pc.get_pseudocost(j, solution, averages); - f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); - f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + auto [pc_up, pc_down] = pc.get_pseudocost(j, averages); + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); f_t score = 0; branch_direction_t dir = branch_direction_t::DOWN; @@ -145,12 +147,14 @@ branch_variable_t guided_diving(pseudo_costs_t& pc, constexpr f_t eps = f_t(1e-6); for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); branch_direction_t dir = down_dist < up_dist + eps ? branch_direction_t::DOWN : branch_direction_t::UP; - auto [f_up, f_down, pc_up, pc_down] = pc.get_pseudocost(j, solution, averages); + auto [pc_up, pc_down] = pc.get_pseudocost(j, averages); f_t score1 = dir == branch_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; f_t score2 = dir == branch_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; f_t score = (score1 + score2) / 6; diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index 6e2f5cf4d9..8f68d4ba34 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -1241,7 +1241,7 @@ void strong_branching(const lp_problem_t& original_lp, template pseudo_cost_value_t pseudo_costs_t::get_pseudocost( - i_t j, const std::vector& solution, pseudo_cost_averages_t averages) const + i_t j, pseudo_cost_averages_t averages) const { i_t num_up = pseudo_cost_num_up[j]; f_t sum_up = pseudo_cost_sum_up[j]; @@ -1249,9 +1249,18 @@ pseudo_cost_value_t pseudo_costs_t::get_pseudocost( f_t sum_down = pseudo_cost_sum_down[j]; f_t pc_up = num_up > 0 ? sum_up / num_up : averages.up_avg; f_t pc_down = num_down > 0 ? sum_down / num_down : averages.down_avg; - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - return {.f_up = f_up, .f_down = f_down, .pc_up = pc_up, .pc_down = pc_down}; + return {pc_up, pc_down}; +} + +template +f_t pseudo_costs_t::calculate_pseudocost_score( + i_t j, const std::vector& solution, pseudo_cost_averages_t averages) const +{ + constexpr f_t eps = 1e-6; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + auto [pc_up, pc_down] = get_pseudocost(j, averages); + return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); } template @@ -1316,8 +1325,7 @@ i_t pseudo_costs_t::variable_selection(const std::vector averages.up_avg); for (i_t j : fractional) { - auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, solution, averages); - f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + f_t score = calculate_pseudocost_score(j, solution, averages); if (score > max_score) { max_score = score; @@ -1399,8 +1407,7 @@ i_t pseudo_costs_t::reliable_variable_selection( unreliable_list.push_back(std::make_pair(-1, j)); continue; } - auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, leaf_solution.x, averages); - f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); if (score > max_score) { max_score = score; @@ -1516,8 +1523,7 @@ i_t pseudo_costs_t::reliable_variable_selection( score = std::max(estimate.up_obj_change, eps) * std::max(estimate.down_obj_change, eps); } else { // Use the previous score, even if it is unreliable - auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, leaf_solution.x, averages); - score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + score = calculate_pseudocost_score(j, leaf_solution.x, averages); } } } else { @@ -1692,9 +1698,7 @@ i_t pseudo_costs_t::reliable_variable_selection( if (toc(start_time) > settings.time_limit) { continue; } - auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, leaf_solution.x, averages); - score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); - + score = calculate_pseudocost_score(j, leaf_solution.x, averages); score_mutex.lock(); if (score > max_score) { max_score = score; @@ -1708,24 +1712,6 @@ i_t pseudo_costs_t::reliable_variable_selection( f_t dual_simplex_elapsed = toc(dual_simplex_start_time); - // TODO put back - // if (rb_mode != 2) { - // if (rb_mode == 1) { - // settings.log.debug( - // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed, %d skipped - // (PDLP) in %.2fs\n", num_candidates, dual_simplex_optimal.load(), num_candidates * 2, - // dual_simplex_infeasible.load(), num_candidates * 2, - // dual_simplex_failed.load(), num_candidates * 2, - // dual_simplex_skipped.load(), dual_simplex_elapsed); - // } else { - // settings.log.debug( - // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed in - // %.2fs\n", num_candidates, dual_simplex_optimal.load(), num_candidates * 2, - // dual_simplex_infeasible.load(), num_candidates * 2, dual_simplex_failed.load(), - // num_candidates * 2, dual_simplex_elapsed); - // } - //} - if (use_pdlp) { #pragma omp taskwait @@ -1774,9 +1760,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } } - auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, leaf_solution.x, averages); - f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); - + f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); if (score > max_score) { max_score = score; branch_var = j; @@ -1807,8 +1791,10 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& frac pseudo_cost_averages_t averages = compute_averages(); for (i_t j : fractional) { - constexpr f_t eps = 1e-6; - auto [f_up, f_down, pc_up, pc_down] = get_pseudocost(j, solution, averages); + constexpr f_t eps = 1e-6; + auto [pc_up, pc_down] = get_pseudocost(j, averages); + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; estimate += std::min(pc_down * f_down, pc_up * f_up); } diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 1f801a41f9..4a3a72fb39 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -94,8 +94,6 @@ struct pseudo_cost_averages_t { template struct pseudo_cost_value_t { - f_t f_up; - f_t f_down; f_t pc_up; f_t pc_down; }; @@ -208,10 +206,13 @@ class pseudo_costs_t { detail::compute_hash(pseudo_cost_num_down) ^ detail::compute_hash(pseudo_cost_num_up); } - pseudo_cost_value_t get_pseudocost(i_t j, - const std::vector& solution, + pseudo_cost_value_t get_pseudocost(i_t variable, pseudo_cost_averages_t averages) const; + f_t calculate_pseudocost_score(i_t j, + const std::vector& solution, + pseudo_cost_averages_t averages) const; + reliability_branching_settings_t reliability_branching_settings; simplex_solver_settings_t settings; @@ -243,7 +244,11 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { *this = other; } - pseudo_cost_snapshot_t(const Base& other) : Base(1, other.settings) { *this = other; } + pseudo_cost_snapshot_t(const pseudo_cost_snapshot_t& other) : Base(1, other.settings) + { + *this = other; + } + pseudo_cost_snapshot_t& operator=( const pseudo_costs_t& other) { @@ -266,7 +271,7 @@ class pseudo_cost_snapshot_t : public pseudo_costs_t { return *this; } - pseudo_cost_snapshot_t& operator=(const Base& other) + pseudo_cost_snapshot_t& operator=(const pseudo_cost_snapshot_t& other) { if (this != &other) { this->AT = other.AT; From 5e21d9a0dbb25ee417bf6ee1cc1dd45d8827f77c Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 1 May 2026 17:08:29 +0200 Subject: [PATCH 50/53] break down long expression for batch pdlp Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/pseudo_costs.cpp | 27 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index 8f68d4ba34..6f940e8f52 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -1426,6 +1426,7 @@ i_t pseudo_costs_t::reliable_variable_selection( // 0: no batch PDLP, 1: cooperative batch PDLP and DS, 2: batch PDLP only const i_t rb_mode = settings.mip_batch_pdlp_reliability_branching; + // We don't use batch PDLP in reliability branching if the PDLP warm start data was not filled // This indicates that PDLP alone (not batched) couldn't even run at the root node // So it will most likely perform poorly compared to DS @@ -1434,12 +1435,26 @@ i_t pseudo_costs_t::reliable_variable_selection( // using batch PDLP constexpr i_t min_num_candidates_for_pdlp = 5; constexpr f_t min_percent_solved_by_batch_pdlp_at_root_for_pdlp = 5.0; - // Batch PDLP is either forced or we use the heuristic to decide if it should be used - const bool use_pdlp = (rb_mode == 2) || (rb_mode != 0 && !settings.sub_mip && - !settings.deterministic && pdlp_warm_cache->populated && - unreliable_list.size() > min_num_candidates_for_pdlp && - pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root > - min_percent_solved_by_batch_pdlp_at_root_for_pdlp); + + // Check if batch PDLP was forced to be on + bool use_pdlp = rb_mode == 2; + + // Use the heuristic to decide if it should be used (in case it is set to automatic) + if (!use_pdlp && rb_mode != 0) { + // Check if it is a sub MIP or the determinism mode is on. + use_pdlp = !settings.sub_mip; + use_pdlp &= !settings.deterministic; + + // Check if the warm cache was filled at the root + use_pdlp &= pdlp_warm_cache->populated; + + // Check if there are enough candidates for batch PDLP + use_pdlp &= unreliable_list.size() > min_num_candidates_for_pdlp; + + // Check if batch PDLP was effective for strong branching at the root node + use_pdlp &= pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root > + min_percent_solved_by_batch_pdlp_at_root_for_pdlp; + } if (rb_mode != 0 && !pdlp_warm_cache->populated) { settings.log.debug("PDLP warm start data not populated, using DS only\n"); From ec4514f9d706e5e2c64a78b6820499cc92ea6130 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 1 May 2026 21:48:56 +0200 Subject: [PATCH 51/53] eliminated bnb mode in pseudocost. replace types into runtime checks Signed-off-by: Nicolas L. Guidotti --- .../branch_and_bound/diving_heuristics.cpp | 50 +-- .../branch_and_bound/diving_heuristics.hpp | 8 +- cpp/src/branch_and_bound/pseudo_costs.cpp | 375 ++++++++++++------ cpp/src/branch_and_bound/pseudo_costs.hpp | 161 +++----- 4 files changed, 340 insertions(+), 254 deletions(-) diff --git a/cpp/src/branch_and_bound/diving_heuristics.cpp b/cpp/src/branch_and_bound/diving_heuristics.cpp index c0e31c7d89..a0bb731c1e 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.cpp +++ b/cpp/src/branch_and_bound/diving_heuristics.cpp @@ -7,8 +7,6 @@ #include -#include - namespace cuopt::linear_programming::dual_simplex { template @@ -65,8 +63,8 @@ branch_variable_t line_search_diving(const std::vector& fractional, return {branch_var, round_dir}; } -template -branch_variable_t pseudocost_diving(pseudo_costs_t& pc, +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& root_solution, @@ -75,7 +73,8 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const i_t num_fractional = fractional.size(); if (num_fractional == 0) return {-1, branch_direction_t::NONE}; - pseudo_cost_averages_t averages = pc.compute_averages(); + f_t avg_down = pc.compute_pseudocost_average_down(); + f_t avg_up = pc.compute_pseudocost_average_up(); i_t branch_var = fractional[0]; f_t max_score = std::numeric_limits::lowest(); @@ -83,11 +82,12 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, constexpr f_t eps = f_t(1e-6); for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - auto [pc_up, pc_down] = pc.get_pseudocost(j, averages); - f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); - f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t pc_down = pc.get_pseudocost_down(j, avg_down); + f_t pc_up = pc.get_pseudocost_up(j, avg_up); + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); f_t score = 0; branch_direction_t dir = branch_direction_t::DOWN; @@ -129,8 +129,8 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, return {branch_var, round_dir}; } -template -branch_variable_t guided_diving(pseudo_costs_t& pc, +template +branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, @@ -139,7 +139,8 @@ branch_variable_t guided_diving(pseudo_costs_t& pc, const i_t num_fractional = fractional.size(); if (num_fractional == 0) return {-1, branch_direction_t::NONE}; - pseudo_cost_averages_t averages = pc.compute_averages(); + f_t avg_down = pc.compute_pseudocost_average_down(); + f_t avg_up = pc.compute_pseudocost_average_up(); i_t branch_var = fractional[0]; f_t max_score = std::numeric_limits::lowest(); @@ -154,10 +155,11 @@ branch_variable_t guided_diving(pseudo_costs_t& pc, branch_direction_t dir = down_dist < up_dist + eps ? branch_direction_t::DOWN : branch_direction_t::UP; - auto [pc_up, pc_down] = pc.get_pseudocost(j, averages); - f_t score1 = dir == branch_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; - f_t score2 = dir == branch_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; - f_t score = (score1 + score2) / 6; + f_t pc_down = pc.get_pseudocost_down(j, avg_down); + f_t pc_up = pc.get_pseudocost_up(j, avg_up); + f_t score1 = dir == branch_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; + f_t score2 = dir == branch_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; + f_t score = (score1 + score2) / 6; if (score > max_score) { max_score = score; @@ -257,26 +259,12 @@ template branch_variable_t pseudocost_diving(pseudo_costs_t& p const std::vector& root_solution, logger_t& log); -template branch_variable_t pseudocost_diving( - pseudo_costs_t& pc, - const std::vector& fractional, - const std::vector& solution, - const std::vector& root_solution, - logger_t& log); - template branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, logger_t& log); -template branch_variable_t guided_diving( - pseudo_costs_t& pc, - const std::vector& fractional, - const std::vector& solution, - const std::vector& incumbent, - logger_t& log); - template void calculate_variable_locks(const lp_problem_t& lp_problem, std::vector& up_locks, std::vector& down_locks); diff --git a/cpp/src/branch_and_bound/diving_heuristics.hpp b/cpp/src/branch_and_bound/diving_heuristics.hpp index 325aa0b878..dfeabe3a5f 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.hpp +++ b/cpp/src/branch_and_bound/diving_heuristics.hpp @@ -22,15 +22,15 @@ branch_variable_t line_search_diving(const std::vector& fractional, const std::vector& root_solution, logger_t& log); -template -branch_variable_t pseudocost_diving(pseudo_costs_t& pc, +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& root_solution, logger_t& log); -template -branch_variable_t guided_diving(pseudo_costs_t& pc, +template +branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& fractional, const std::vector& solution, const std::vector& incumbent, diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index 6f940e8f52..3337c279c2 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -24,7 +24,6 @@ #include namespace cuopt::linear_programming::dual_simplex { - namespace { static bool is_dual_simplex_done(dual::status_t status) @@ -1239,93 +1238,243 @@ void strong_branching(const lp_problem_t& original_lp, fractional, strong_branch_down, strong_branch_up, root_solution.x); } -template -pseudo_cost_value_t pseudo_costs_t::get_pseudocost( - i_t j, pseudo_cost_averages_t averages) const +template +inline void pseudo_costs_t::update_pseudocost_down(i_t j, f_t delta) { - i_t num_up = pseudo_cost_num_up[j]; - f_t sum_up = pseudo_cost_sum_up[j]; - i_t num_down = pseudo_cost_num_down[j]; - f_t sum_down = pseudo_cost_sum_down[j]; - f_t pc_up = num_up > 0 ? sum_up / num_up : averages.up_avg; - f_t pc_down = num_down > 0 ? sum_down / num_down : averages.down_avg; - return {pc_up, pc_down}; + if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { + pseudo_cost_sum_down[j] += delta; + pseudo_cost_num_down[j]++; + } else { +#pragma omp atomic + pseudo_cost_sum_down[j] += delta; + +#pragma omp atomic + pseudo_cost_num_down[j]++; + } } -template -f_t pseudo_costs_t::calculate_pseudocost_score( - i_t j, const std::vector& solution, pseudo_cost_averages_t averages) const +template +inline void pseudo_costs_t::update_pseudocost_up(i_t j, f_t delta) { - constexpr f_t eps = 1e-6; - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - auto [pc_up, pc_down] = get_pseudocost(j, averages); - return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { + pseudo_cost_sum_up[j] += delta; + pseudo_cost_num_up[j]++; + } else { +#pragma omp atomic + pseudo_cost_sum_up[j] += delta; + +#pragma omp atomic + pseudo_cost_num_up[j]++; + } } -template -void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, - f_t leaf_objective) +template +inline i_t pseudo_costs_t::get_pseudocost_num_down(i_t j) { - const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0); - const f_t frac = node_ptr->branch_dir == branch_direction_t::DOWN - ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) - : std::ceil(node_ptr->fractional_val) - node_ptr->fractional_val; + if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { + return pseudo_cost_num_down[j]; + } else { + i_t num; +#pragma omp atomic read + num = pseudo_cost_num_down[j]; + return num; + } +} - if (node_ptr->branch_dir == branch_direction_t::DOWN) { - pseudo_cost_sum_down[node_ptr->branch_var] += change_in_obj / frac; - pseudo_cost_num_down[node_ptr->branch_var]++; +template +inline i_t pseudo_costs_t::get_pseudocost_num_up(i_t j) +{ + if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { + return pseudo_cost_num_up[j]; + } else { + i_t num; +#pragma omp atomic read + num = pseudo_cost_num_up[j]; + return num; + } +} + +template +inline f_t pseudo_costs_t::get_pseudocost_down(i_t j, f_t avg) const +{ + if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { + return pseudo_cost_num_down[j] > 0 ? pseudo_cost_sum_down[j] / pseudo_cost_num_down[j] : avg; + } else { + i_t num; + f_t sum; + +#pragma omp atomic read + num = pseudo_cost_num_down[j]; + + if (num > 0) { +#pragma omp atomic read + sum = pseudo_cost_sum_down[j]; + + return sum / num; + } else { + return avg; + } + } +} + +template +inline f_t pseudo_costs_t::get_pseudocost_up(i_t j, f_t avg) const +{ + if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { + return pseudo_cost_num_up[j] > 0 ? pseudo_cost_sum_up[j] / pseudo_cost_num_up[j] : avg; + } else { + i_t num; + f_t sum; + +#pragma omp atomic read + num = pseudo_cost_num_up[j]; + + if (num > 0) { +#pragma omp atomic read + sum = pseudo_cost_sum_up[j]; + + return sum / num; + } else { + return avg; + } + } +} + +template +inline f_t pseudo_costs_t::compute_pseudocost_average_down() +{ + i_t num_initialized = 0; + f_t avg = 0.0; + + if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { + for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { + if (pseudo_cost_num_down[j] > 0 && std::isfinite(pseudo_cost_sum_down[j])) { + ++num_initialized; + avg += pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + } + } } else { - pseudo_cost_sum_up[node_ptr->branch_var] += change_in_obj / frac; - pseudo_cost_num_up[node_ptr->branch_var]++; + for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { + i_t n; + f_t sum; + +#pragma omp atomic read + n = pseudo_cost_num_down[j]; + +#pragma omp atomic read + sum = pseudo_cost_sum_down[j]; + + if (n > 0 && std::isfinite(sum)) { + ++num_initialized; + avg += sum / n; + } + } } + + return (num_initialized > 0) ? avg / num_initialized : 1.0; } -template -pseudo_cost_averages_t pseudo_costs_t::compute_averages() const +template +inline f_t pseudo_costs_t::compute_pseudocost_average_up() { - i_t num_initialized_down = 0; - i_t num_initialized_up = 0; - f_t pseudo_cost_down_avg = 0.0; - f_t pseudo_cost_up_avg = 0.0; - - for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { - if (pseudo_cost_num_down[j] > 0 && std::isfinite(pseudo_cost_sum_down[j])) { - ++num_initialized_down; - pseudo_cost_down_avg += pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + i_t num_initialized = 0; + f_t avg = 0.0; + + if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { + for (size_t j = 0; j < pseudo_cost_sum_up.size(); ++j) { + if (pseudo_cost_num_up[j] > 0 && std::isfinite(pseudo_cost_sum_up[j])) { + ++num_initialized; + avg += pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; + } } + } else { + for (size_t j = 0; j < pseudo_cost_sum_up.size(); ++j) { + i_t n; + f_t sum; + +#pragma omp atomic read + n = pseudo_cost_num_up[j]; - if (pseudo_cost_num_up[j] > 0 && std::isfinite(pseudo_cost_sum_up[j])) { - ++num_initialized_up; - pseudo_cost_up_avg += pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; +#pragma omp atomic read + sum = pseudo_cost_sum_up[j]; + + if (n > 0 && std::isfinite(sum)) { + ++num_initialized; + avg += sum / n; + } } } - pseudo_cost_averages_t averages{ - .down_avg = (num_initialized_down > 0) ? pseudo_cost_down_avg / num_initialized_down : 1.0, - .num_init_down = num_initialized_down, - .up_avg = (num_initialized_up > 0) ? pseudo_cost_up_avg / num_initialized_up : 1.0, - .num_init_up = num_initialized_up}; - return averages; + return (num_initialized > 0) ? avg / num_initialized : 1.0; +} + +template +inline void pseudo_costs_t::lock_variable_up(i_t j) +{ + if (settings.deterministic == CUOPT_MODE_OPPORTUNISTIC) { pseudo_cost_mutex_up[j].lock(); } } -template -i_t pseudo_costs_t::variable_selection(const std::vector& fractional, - const std::vector& solution) +template +inline void pseudo_costs_t::lock_variable_down(i_t j) { - constexpr f_t eps = 1e-6; - i_t branch_var = fractional[0]; - f_t max_score = -1; - pseudo_cost_averages_t averages = compute_averages(); + if (settings.deterministic == CUOPT_MODE_OPPORTUNISTIC) { pseudo_cost_mutex_down[j].lock(); } +} - settings.log.debug("PC: num initialized down %d up %d avg down %e up %e\n", - averages.num_init_down, - averages.num_init_up, - averages.down_avg, - averages.up_avg); +template +inline void pseudo_costs_t::unlock_variable_up(i_t j) +{ + if (settings.deterministic == CUOPT_MODE_OPPORTUNISTIC) { pseudo_cost_mutex_up[j].unlock(); } +} + +template +inline void pseudo_costs_t::unlock_variable_down(i_t j) +{ + if (settings.deterministic == CUOPT_MODE_OPPORTUNISTIC) { pseudo_cost_mutex_down[j].unlock(); } +} + +template +f_t pseudo_costs_t::calculate_pseudocost_score(i_t j, + const std::vector& solution, + f_t avg_down, + f_t avg_up) const +{ + constexpr f_t eps = 1e-6; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t pc_down = get_pseudocost_down(j, avg_down); + f_t pc_up = get_pseudocost_up(j, avg_up); + return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); +} + +template +void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, + f_t leaf_objective) +{ + const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0); + const f_t frac = node_ptr->branch_dir == branch_direction_t::DOWN + ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) + : std::ceil(node_ptr->fractional_val) - node_ptr->fractional_val; + + if (node_ptr->branch_dir == branch_direction_t::DOWN) { + update_pseudocost_down(node_ptr->branch_var, change_in_obj / frac); + } else { + update_pseudocost_up(node_ptr->branch_var, change_in_obj / frac); + } +} + +template +i_t pseudo_costs_t::variable_selection(const std::vector& fractional, + const std::vector& solution) +{ + i_t branch_var = fractional[0]; + f_t max_score = -1; + f_t avg_down = compute_pseudocost_average_down(); + f_t avg_up = compute_pseudocost_average_up(); + + settings.log.debug("PC: avg down %e up %e\n", avg_down, avg_up); for (i_t j : fractional) { - f_t score = calculate_pseudocost_score(j, solution, averages); + f_t score = calculate_pseudocost_score(j, solution, avg_down, avg_up); if (score > max_score) { max_score = score; @@ -1341,8 +1490,8 @@ i_t pseudo_costs_t::variable_selection(const std::vector return branch_var; } -template -i_t pseudo_costs_t::reliable_variable_selection( +template +i_t pseudo_costs_t::reliable_variable_selection( const mip_node_t* node_ptr, const std::vector& fractional, branch_and_bound_worker_t* worker, @@ -1357,7 +1506,8 @@ i_t pseudo_costs_t::reliable_variable_selection( f_t start_time = bnb_stats.start_time; i_t branch_var = fractional[0]; f_t max_score = -1; - pseudo_cost_averages_t averages; + f_t avg_down{0}; + f_t avg_up{0}; lp_solution_t& leaf_solution = worker->leaf_solution; const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters; @@ -1390,24 +1540,21 @@ i_t pseudo_costs_t::reliable_variable_selection( // In the latter, we are not using the average pseudocost (which calculated in the `initialized` // method). if (reliable_threshold == 0) { - averages = compute_averages(); - settings.log.debug("PC: num initialized down %d up %d avg down %e up %e\n", - averages.num_init_down, - averages.num_init_up, - averages.down_avg, - averages.up_avg); + avg_down = compute_pseudocost_average_down(); + avg_up = compute_pseudocost_average_up(); + settings.log.debug("PC: avg down %e up %e\n", avg_down, avg_up); } std::vector> unreliable_list; omp_mutex_t score_mutex; for (i_t j : fractional) { - if (pseudo_cost_num_down[j] < reliable_threshold || - pseudo_cost_num_up[j] < reliable_threshold) { + if (get_pseudocost_num_down(j) < reliable_threshold || + get_pseudocost_num_up(j) < reliable_threshold) { unreliable_list.push_back(std::make_pair(-1, j)); continue; } - f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, avg_down, avg_up); if (score > max_score) { max_score = score; @@ -1517,7 +1664,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } for (auto& [score, j] : unreliable_list) { - if (pseudo_cost_num_down[j] == 0 || pseudo_cost_num_up[j] == 0) { + if (get_pseudocost_num_down(j) == 0 || get_pseudocost_num_up(j) == 0) { // Estimate the objective change by performing a single pivot of dual simplex. objective_change_estimate_t estimate = single_pivot_objective_change_estimate(worker->leaf_problem, @@ -1538,7 +1685,7 @@ i_t pseudo_costs_t::reliable_variable_selection( score = std::max(estimate.up_obj_change, eps) * std::max(estimate.down_obj_change, eps); } else { // Use the previous score, even if it is unreliable - score = calculate_pseudocost_score(j, leaf_solution.x, averages); + score = calculate_pseudocost_score(j, leaf_solution.x, avg_down, avg_up); } } } else { @@ -1627,8 +1774,8 @@ i_t pseudo_costs_t::reliable_variable_selection( settings.log.debug( "DS skipping variable %d branch down (shared_idx %d): already solved by PDLP\n", j, i); } else { - pseudo_cost_mutex_down[j].lock(); - if (pseudo_cost_num_down[j] < reliable_threshold) { + lock_variable_down(j); + if (get_pseudocost_num_down(j) < reliable_threshold) { // Do trial branching on the down branch i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, @@ -1653,16 +1800,14 @@ i_t pseudo_costs_t::reliable_variable_selection( if (!std::isnan(obj)) { f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); f_t change_in_x = leaf_solution.x[j] - std::floor(leaf_solution.x[j]); - pseudo_cost_sum_down[j] += change_in_obj / change_in_x; - pseudo_cost_num_down[j]++; - // Should be valid if were are already here + update_pseudocost_down(j, change_in_obj / change_in_x); if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(i); } } } else { // Variable became reliable, make it as solved so that batch PDLP does not solve it again if (rb_mode == 1) sb_view.mark_solved(i); } - pseudo_cost_mutex_down[j].unlock(); + unlock_variable_down(j); } if (toc(start_time) > settings.time_limit) { continue; } @@ -1674,8 +1819,8 @@ i_t pseudo_costs_t::reliable_variable_selection( j, shared_idx); } else { - pseudo_cost_mutex_up[j].lock(); - if (pseudo_cost_num_up[j] < reliable_threshold) { + lock_variable_up(j); + if (get_pseudocost_num_up(j) < reliable_threshold) { i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, @@ -1699,21 +1844,19 @@ i_t pseudo_costs_t::reliable_variable_selection( if (!std::isnan(obj)) { f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); f_t change_in_x = std::ceil(leaf_solution.x[j]) - leaf_solution.x[j]; - pseudo_cost_sum_up[j] += change_in_obj / change_in_x; - pseudo_cost_num_up[j]++; - // Should be valid if were are already here + update_pseudocost_up(j, change_in_obj / change_in_x); if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(shared_idx); } } } else { // Variable became reliable, make it as solved so that batch PDLP does not solve it again if (rb_mode == 1) sb_view.mark_solved(shared_idx); } - pseudo_cost_mutex_up[j].unlock(); + unlock_variable_up(j); } if (toc(start_time) > settings.time_limit) { continue; } - score = calculate_pseudocost_score(j, leaf_solution.x, averages); + score = calculate_pseudocost_score(j, leaf_solution.x, avg_down, avg_up); score_mutex.lock(); if (score > max_score) { max_score = score; @@ -1743,15 +1886,14 @@ i_t pseudo_costs_t::reliable_variable_selection( // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent // calls may have made it reliable) if (source == sb_source_t::PDLP) { - pseudo_cost_mutex_down[j].lock(); - if (pseudo_cost_num_down[j] < reliable_threshold) { + lock_variable_down(j); + if (get_pseudocost_num_down(j) < reliable_threshold) { f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps); f_t change_in_x = leaf_solution.x[j] - std::floor(leaf_solution.x[j]); - pseudo_cost_sum_down[j] += change_in_obj / change_in_x; - pseudo_cost_num_down[j]++; + update_pseudocost_down(j, change_in_obj / change_in_x); pdlp_applied++; } - pseudo_cost_mutex_down[j].unlock(); + unlock_variable_down(j); } } @@ -1763,19 +1905,18 @@ i_t pseudo_costs_t::reliable_variable_selection( // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent // calls may have made it reliable) if (source == sb_source_t::PDLP) { - pseudo_cost_mutex_up[j].lock(); - if (pseudo_cost_num_up[j] < reliable_threshold) { + lock_variable_up(j); + if (get_pseudocost_num_up(j) < reliable_threshold) { f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps); f_t change_in_x = std::ceil(leaf_solution.x[j]) - leaf_solution.x[j]; - pseudo_cost_sum_up[j] += change_in_obj / change_in_x; - pseudo_cost_num_up[j]++; + update_pseudocost_up(j, change_in_obj / change_in_x); pdlp_applied++; } - pseudo_cost_mutex_up[j].unlock(); + unlock_variable_up(j); } } - f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, avg_down, avg_up); if (score > max_score) { max_score = score; branch_var = j; @@ -1795,21 +1936,20 @@ i_t pseudo_costs_t::reliable_variable_selection( return branch_var; } -template -f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, - const std::vector& solution, - f_t lower_bound) +template +f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound) { - const i_t num_fractional = fractional.size(); - f_t estimate = lower_bound; - - pseudo_cost_averages_t averages = compute_averages(); + f_t estimate = lower_bound; + f_t avg_down = compute_pseudocost_average_down(); + f_t avg_up = compute_pseudocost_average_up(); for (i_t j : fractional) { - constexpr f_t eps = 1e-6; - auto [pc_up, pc_down] = get_pseudocost(j, averages); - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t pc_down = get_pseudocost_down(j, avg_down); + f_t pc_up = get_pseudocost_up(j, avg_up); + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; estimate += std::min(pc_down * f_down, pc_up * f_up); } @@ -1817,8 +1957,8 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& frac return estimate; } -template -void pseudo_costs_t::update_pseudo_costs_from_strong_branching( +template +void pseudo_costs_t::update_pseudo_costs_from_strong_branching( const std::vector& fractional, const std::vector& strong_branch_down, const std::vector& strong_branch_up, @@ -1846,8 +1986,7 @@ void pseudo_costs_t::update_pseudo_costs_from_strong_branchin #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template class pseudo_costs_t; -template class pseudo_costs_t; +template class pseudo_costs_t; template class pseudo_cost_snapshot_t; template void strong_branching(const lp_problem_t& original_lp, diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 4a3a72fb39..236ee83730 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -19,7 +19,6 @@ #include #include -#include #include #include @@ -51,10 +50,10 @@ struct reliability_branching_settings_t { f_t bnb_lp_factor = 0.5; i_t bnb_lp_offset = 100000; - // Maximum and minimum points in curve to determine the value + // Maximum and minimum points in curve to determine the sumue // of the `reliable_threshold` based on the current number of LP // iterations in strong branching and B&B. Since it is a - // a curve, the actual value of `reliable_threshold` may be + // a curve, the actual sumue of `reliable_threshold` may be // higher than `max_reliable_threshold`. // Only used when `reliable_threshold` is negative i_t max_reliable_threshold = 5; @@ -84,20 +83,6 @@ struct batch_pdlp_warm_cache_t { bool populated{false}; }; -template -struct pseudo_cost_averages_t { - f_t down_avg; - i_t num_init_down; - f_t up_avg; - i_t num_init_up; -}; - -template -struct pseudo_cost_value_t { - f_t pc_up; - f_t pc_down; -}; - template struct pseudo_cost_update_t { i_t variable; @@ -115,30 +100,9 @@ struct pseudo_cost_update_t { } }; -// `BnBMode` specify how we control the memory accesses: -// - If `BnBMode == branch_and_bound_mode_t::PARALLEL`, then we assume that this object is shared -// among the B&B threads, and thus, require atomics and mutexes to avoid data races. -// - If `BnBMode == branch_and_bound_mode_t::DETERMINISTIC`, then each thread has it own pseudocost -// snapshot, hence, we can disable all atomics and mutexes. -// `BnBMode` is automatically set depending if it is a `pseudo_costs_t` (PARALLEL) -// or a `pseudo_costs_snapshot_t` (DETERMINISTIC). -template +template class pseudo_costs_t { public: - // Define the types used for storing the pseudocost of each variable. - // Disable or enable atomics depending on if we are in REGULAR or DETERMINISTIC modes - using float_type = omp_atomic_t; - - using int_type = omp_atomic_t; - - // Counting the number of LP iterations might require more than an int32 can hold. - using int64_type = omp_atomic_t; - - // Disable or enable mutexes depending on if we are in REGULAR or DETERMINISTIC modes - using mutex_type = omp_mutex_t; - explicit pseudo_costs_t(i_t num_variables, const simplex_solver_settings_t& settings) : settings(settings), pseudo_cost_sum_down(num_variables), @@ -152,6 +116,24 @@ class pseudo_costs_t { { } + pseudo_costs_t(const pseudo_costs_t& other) : pseudo_costs_t(1, other.settings) + { + *this = other; + } + + pseudo_costs_t& operator=(const pseudo_costs_t& other) + { + if (this != &other) { + this->AT = other.AT; + this->pdlp_warm_cache = other.pdlp_warm_cache; + this->pseudo_cost_num_down = other.pseudo_cost_num_down; + this->pseudo_cost_num_up = other.pseudo_cost_num_up; + this->pseudo_cost_sum_down = other.pseudo_cost_sum_down; + this->pseudo_cost_sum_up = other.pseudo_cost_sum_up; + } + return *this; + } + void update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective); void merge_updates(const std::vector>& updates) @@ -177,7 +159,11 @@ class pseudo_costs_t { pseudo_cost_mutex_down.resize(num_variables); } - pseudo_cost_averages_t compute_averages() const; + f_t get_pseudocost_down(i_t j, f_t avg) const; + f_t get_pseudocost_up(i_t j, f_t avg) const; + + f_t compute_pseudocost_average_down(); + f_t compute_pseudocost_average_up(); f_t obj_estimate(const std::vector& fractional, const std::vector& solution, @@ -206,83 +192,56 @@ class pseudo_costs_t { detail::compute_hash(pseudo_cost_num_down) ^ detail::compute_hash(pseudo_cost_num_up); } - pseudo_cost_value_t get_pseudocost(i_t variable, - pseudo_cost_averages_t averages) const; - f_t calculate_pseudocost_score(i_t j, const std::vector& solution, - pseudo_cost_averages_t averages) const; + f_t avg_down, + f_t avg_up) const; + + std::shared_ptr> AT; // Transpose of the constraint matrix A + std::shared_ptr> pdlp_warm_cache; reliability_branching_settings_t reliability_branching_settings; simplex_solver_settings_t settings; - std::shared_ptr> AT; // Transpose of the constraint matrix A - std::vector pseudo_cost_sum_up; - std::vector pseudo_cost_sum_down; - std::vector pseudo_cost_num_up; - std::vector pseudo_cost_num_down; - std::vector pseudo_cost_mutex_up; - std::vector pseudo_cost_mutex_down; - int64_type strong_branching_lp_iter = 0; + protected: + // Do not use this attributes directly. Instead rely on the get/update/set methods + // as they conditionally use atomics when needed + std::vector pseudo_cost_sum_up; + std::vector pseudo_cost_sum_down; + std::vector pseudo_cost_num_up; + std::vector pseudo_cost_num_down; + std::vector pseudo_cost_mutex_up; + std::vector pseudo_cost_mutex_down; - std::shared_ptr> pdlp_warm_cache; -}; + omp_atomic_t strong_branching_lp_iter = 0; -template -class pseudo_cost_snapshot_t : public pseudo_costs_t { - public: - using Base = pseudo_costs_t; + void update_pseudocost_down(i_t j, f_t delta); + void update_pseudocost_up(i_t j, f_t delta); - pseudo_cost_snapshot_t(i_t num_variables, const simplex_solver_settings_t& settings) - : Base(num_variables, settings) {}; + i_t get_pseudocost_num_down(i_t j); + i_t get_pseudocost_num_up(i_t j); - pseudo_cost_snapshot_t(const pseudo_costs_t& other) - : Base(1, other.settings) - { - *this = other; - } + void lock_variable_up(i_t j); + void lock_variable_down(i_t j); + void unlock_variable_up(i_t j); + void unlock_variable_down(i_t j); +}; - pseudo_cost_snapshot_t(const pseudo_cost_snapshot_t& other) : Base(1, other.settings) - { - *this = other; - } +template +class pseudo_cost_snapshot_t : public pseudo_costs_t { + public: + using Base = pseudo_costs_t; + using Base::Base; - pseudo_cost_snapshot_t& operator=( - const pseudo_costs_t& other) + pseudo_cost_snapshot_t(const pseudo_costs_t& other) : Base(1, other.settings) { - this->AT = other.AT; - this->pdlp_warm_cache = other.pdlp_warm_cache; - - i_t n = other.pseudo_cost_num_down.size(); - this->pseudo_cost_num_down.resize(n); - this->pseudo_cost_num_up.resize(n); - this->pseudo_cost_sum_down.resize(n); - this->pseudo_cost_sum_up.resize(n); - - for (i_t i = 0; i < n; ++i) { - this->pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].underlying(); - this->pseudo_cost_num_up[i] = other.pseudo_cost_num_up[i].underlying(); - this->pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].underlying(); - this->pseudo_cost_sum_up[i] = other.pseudo_cost_sum_up[i].underlying(); - } - - return *this; + Base::operator=(other); } - pseudo_cost_snapshot_t& operator=(const pseudo_cost_snapshot_t& other) + pseudo_cost_snapshot_t operator=(const pseudo_costs_t& other) { - if (this != &other) { - this->AT = other.AT; - this->pdlp_warm_cache = other.pdlp_warm_cache; - this->pseudo_cost_num_down = other.pseudo_cost_num_down; - this->pseudo_cost_num_up = other.pseudo_cost_num_up; - this->pseudo_cost_sum_down = other.pseudo_cost_sum_down; - this->pseudo_cost_sum_up = other.pseudo_cost_sum_up; - } - return *this; - }; + return Base::operator=(other); + } void queue_update( i_t variable, branch_direction_t direction, f_t delta, double clock, int worker_id) From 34b6a4028fa0294724ada0b75cb2b9f19b64409f Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Sun, 3 May 2026 10:24:25 +0200 Subject: [PATCH 52/53] simplified code to only contain the path with atomics Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/pseudo_costs.cpp | 230 ++++------------------ cpp/src/branch_and_bound/pseudo_costs.hpp | 36 ++-- 2 files changed, 56 insertions(+), 210 deletions(-) diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index 3337c279c2..2c4db4df7c 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -1238,136 +1238,18 @@ void strong_branching(const lp_problem_t& original_lp, fractional, strong_branch_down, strong_branch_up, root_solution.x); } -template -inline void pseudo_costs_t::update_pseudocost_down(i_t j, f_t delta) -{ - if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { - pseudo_cost_sum_down[j] += delta; - pseudo_cost_num_down[j]++; - } else { -#pragma omp atomic - pseudo_cost_sum_down[j] += delta; - -#pragma omp atomic - pseudo_cost_num_down[j]++; - } -} - -template -inline void pseudo_costs_t::update_pseudocost_up(i_t j, f_t delta) -{ - if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { - pseudo_cost_sum_up[j] += delta; - pseudo_cost_num_up[j]++; - } else { -#pragma omp atomic - pseudo_cost_sum_up[j] += delta; - -#pragma omp atomic - pseudo_cost_num_up[j]++; - } -} - -template -inline i_t pseudo_costs_t::get_pseudocost_num_down(i_t j) -{ - if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { - return pseudo_cost_num_down[j]; - } else { - i_t num; -#pragma omp atomic read - num = pseudo_cost_num_down[j]; - return num; - } -} - -template -inline i_t pseudo_costs_t::get_pseudocost_num_up(i_t j) -{ - if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { - return pseudo_cost_num_up[j]; - } else { - i_t num; -#pragma omp atomic read - num = pseudo_cost_num_up[j]; - return num; - } -} - -template -inline f_t pseudo_costs_t::get_pseudocost_down(i_t j, f_t avg) const -{ - if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { - return pseudo_cost_num_down[j] > 0 ? pseudo_cost_sum_down[j] / pseudo_cost_num_down[j] : avg; - } else { - i_t num; - f_t sum; - -#pragma omp atomic read - num = pseudo_cost_num_down[j]; - - if (num > 0) { -#pragma omp atomic read - sum = pseudo_cost_sum_down[j]; - - return sum / num; - } else { - return avg; - } - } -} - -template -inline f_t pseudo_costs_t::get_pseudocost_up(i_t j, f_t avg) const -{ - if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { - return pseudo_cost_num_up[j] > 0 ? pseudo_cost_sum_up[j] / pseudo_cost_num_up[j] : avg; - } else { - i_t num; - f_t sum; - -#pragma omp atomic read - num = pseudo_cost_num_up[j]; - - if (num > 0) { -#pragma omp atomic read - sum = pseudo_cost_sum_up[j]; - - return sum / num; - } else { - return avg; - } - } -} - template inline f_t pseudo_costs_t::compute_pseudocost_average_down() { i_t num_initialized = 0; f_t avg = 0.0; - if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { - for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { - if (pseudo_cost_num_down[j] > 0 && std::isfinite(pseudo_cost_sum_down[j])) { - ++num_initialized; - avg += pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; - } - } - } else { - for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { - i_t n; - f_t sum; - -#pragma omp atomic read - n = pseudo_cost_num_down[j]; - -#pragma omp atomic read - sum = pseudo_cost_sum_down[j]; - - if (n > 0 && std::isfinite(sum)) { - ++num_initialized; - avg += sum / n; - } + for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { + i_t num = pseudo_cost_num_down[j]; + f_t sum = pseudo_cost_sum_down[j]; + if (num > 0 && std::isfinite(sum)) { + ++num_initialized; + avg += sum / num; } } @@ -1380,58 +1262,18 @@ inline f_t pseudo_costs_t::compute_pseudocost_average_up() i_t num_initialized = 0; f_t avg = 0.0; - if (settings.deterministic == CUOPT_MODE_DETERMINISTIC) { - for (size_t j = 0; j < pseudo_cost_sum_up.size(); ++j) { - if (pseudo_cost_num_up[j] > 0 && std::isfinite(pseudo_cost_sum_up[j])) { - ++num_initialized; - avg += pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; - } - } - } else { - for (size_t j = 0; j < pseudo_cost_sum_up.size(); ++j) { - i_t n; - f_t sum; - -#pragma omp atomic read - n = pseudo_cost_num_up[j]; - -#pragma omp atomic read - sum = pseudo_cost_sum_up[j]; - - if (n > 0 && std::isfinite(sum)) { - ++num_initialized; - avg += sum / n; - } + for (size_t j = 0; j < pseudo_cost_sum_up.size(); ++j) { + i_t num = pseudo_cost_num_up[j]; + f_t sum = pseudo_cost_sum_up[j]; + if (num > 0 && std::isfinite(sum)) { + ++num_initialized; + avg += sum / num; } } return (num_initialized > 0) ? avg / num_initialized : 1.0; } -template -inline void pseudo_costs_t::lock_variable_up(i_t j) -{ - if (settings.deterministic == CUOPT_MODE_OPPORTUNISTIC) { pseudo_cost_mutex_up[j].lock(); } -} - -template -inline void pseudo_costs_t::lock_variable_down(i_t j) -{ - if (settings.deterministic == CUOPT_MODE_OPPORTUNISTIC) { pseudo_cost_mutex_down[j].lock(); } -} - -template -inline void pseudo_costs_t::unlock_variable_up(i_t j) -{ - if (settings.deterministic == CUOPT_MODE_OPPORTUNISTIC) { pseudo_cost_mutex_up[j].unlock(); } -} - -template -inline void pseudo_costs_t::unlock_variable_down(i_t j) -{ - if (settings.deterministic == CUOPT_MODE_OPPORTUNISTIC) { pseudo_cost_mutex_down[j].unlock(); } -} - template f_t pseudo_costs_t::calculate_pseudocost_score(i_t j, const std::vector& solution, @@ -1456,9 +1298,11 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt : std::ceil(node_ptr->fractional_val) - node_ptr->fractional_val; if (node_ptr->branch_dir == branch_direction_t::DOWN) { - update_pseudocost_down(node_ptr->branch_var, change_in_obj / frac); + pseudo_cost_sum_down[node_ptr->branch_var] += change_in_obj / frac; + pseudo_cost_num_down[node_ptr->branch_var]++; } else { - update_pseudocost_up(node_ptr->branch_var, change_in_obj / frac); + pseudo_cost_sum_up[node_ptr->branch_var] += change_in_obj / frac; + pseudo_cost_num_up[node_ptr->branch_var]++; } } @@ -1549,8 +1393,8 @@ i_t pseudo_costs_t::reliable_variable_selection( omp_mutex_t score_mutex; for (i_t j : fractional) { - if (get_pseudocost_num_down(j) < reliable_threshold || - get_pseudocost_num_up(j) < reliable_threshold) { + if (pseudo_cost_num_down[j] < reliable_threshold || + pseudo_cost_num_up[j] < reliable_threshold) { unreliable_list.push_back(std::make_pair(-1, j)); continue; } @@ -1664,7 +1508,7 @@ i_t pseudo_costs_t::reliable_variable_selection( } for (auto& [score, j] : unreliable_list) { - if (get_pseudocost_num_down(j) == 0 || get_pseudocost_num_up(j) == 0) { + if (pseudo_cost_num_down[j] == 0 || pseudo_cost_num_up[j] == 0) { // Estimate the objective change by performing a single pivot of dual simplex. objective_change_estimate_t estimate = single_pivot_objective_change_estimate(worker->leaf_problem, @@ -1774,8 +1618,8 @@ i_t pseudo_costs_t::reliable_variable_selection( settings.log.debug( "DS skipping variable %d branch down (shared_idx %d): already solved by PDLP\n", j, i); } else { - lock_variable_down(j); - if (get_pseudocost_num_down(j) < reliable_threshold) { + pseudo_cost_mutex_down[j].lock(); + if (pseudo_cost_num_down[j] < reliable_threshold) { // Do trial branching on the down branch i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, @@ -1800,14 +1644,15 @@ i_t pseudo_costs_t::reliable_variable_selection( if (!std::isnan(obj)) { f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); f_t change_in_x = leaf_solution.x[j] - std::floor(leaf_solution.x[j]); - update_pseudocost_down(j, change_in_obj / change_in_x); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(i); } } } else { // Variable became reliable, make it as solved so that batch PDLP does not solve it again if (rb_mode == 1) sb_view.mark_solved(i); } - unlock_variable_down(j); + pseudo_cost_mutex_down[j].unlock(); } if (toc(start_time) > settings.time_limit) { continue; } @@ -1819,8 +1664,8 @@ i_t pseudo_costs_t::reliable_variable_selection( j, shared_idx); } else { - lock_variable_up(j); - if (get_pseudocost_num_up(j) < reliable_threshold) { + pseudo_cost_mutex_up[j].lock(); + if (pseudo_cost_num_up[j] < reliable_threshold) { i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, @@ -1844,14 +1689,15 @@ i_t pseudo_costs_t::reliable_variable_selection( if (!std::isnan(obj)) { f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); f_t change_in_x = std::ceil(leaf_solution.x[j]) - leaf_solution.x[j]; - update_pseudocost_up(j, change_in_obj / change_in_x); + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(shared_idx); } } } else { // Variable became reliable, make it as solved so that batch PDLP does not solve it again if (rb_mode == 1) sb_view.mark_solved(shared_idx); } - unlock_variable_up(j); + pseudo_cost_mutex_up[j].unlock(); } if (toc(start_time) > settings.time_limit) { continue; } @@ -1886,14 +1732,15 @@ i_t pseudo_costs_t::reliable_variable_selection( // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent // calls may have made it reliable) if (source == sb_source_t::PDLP) { - lock_variable_down(j); - if (get_pseudocost_num_down(j) < reliable_threshold) { + pseudo_cost_mutex_down[j].lock(); + if (pseudo_cost_num_down[j] < reliable_threshold) { f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps); f_t change_in_x = leaf_solution.x[j] - std::floor(leaf_solution.x[j]); - update_pseudocost_down(j, change_in_obj / change_in_x); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; pdlp_applied++; } - unlock_variable_down(j); + pseudo_cost_mutex_down[j].unlock(); } } @@ -1905,14 +1752,15 @@ i_t pseudo_costs_t::reliable_variable_selection( // PDLP won the merge, update the pseudo-cost only if node is still unreliable (concurrent // calls may have made it reliable) if (source == sb_source_t::PDLP) { - lock_variable_up(j); - if (get_pseudocost_num_up(j) < reliable_threshold) { + pseudo_cost_mutex_up[j].lock(); + if (pseudo_cost_num_up[j] < reliable_threshold) { f_t change_in_obj = std::max(merged_obj - node_ptr->lower_bound, eps); f_t change_in_x = std::ceil(leaf_solution.x[j]) - leaf_solution.x[j]; - update_pseudocost_up(j, change_in_obj / change_in_x); + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; pdlp_applied++; } - unlock_variable_up(j); + pseudo_cost_mutex_up[j].unlock(); } } diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 236ee83730..a44a2d2f03 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -159,8 +159,19 @@ class pseudo_costs_t { pseudo_cost_mutex_down.resize(num_variables); } - f_t get_pseudocost_down(i_t j, f_t avg) const; - f_t get_pseudocost_up(i_t j, f_t avg) const; + f_t get_pseudocost_down(i_t j, f_t avg) const + { + i_t num = pseudo_cost_num_down[j]; + f_t sum = pseudo_cost_sum_down[j]; + return num > 0 ? sum / num : avg; + } + + f_t get_pseudocost_up(i_t j, f_t avg) const + { + i_t num = pseudo_cost_num_up[j]; + f_t sum = pseudo_cost_sum_up[j]; + return num > 0 ? sum / num : avg; + } f_t compute_pseudocost_average_down(); f_t compute_pseudocost_average_up(); @@ -204,27 +215,14 @@ class pseudo_costs_t { simplex_solver_settings_t settings; protected: - // Do not use this attributes directly. Instead rely on the get/update/set methods - // as they conditionally use atomics when needed - std::vector pseudo_cost_sum_up; - std::vector pseudo_cost_sum_down; - std::vector pseudo_cost_num_up; - std::vector pseudo_cost_num_down; + std::vector> pseudo_cost_sum_up; + std::vector> pseudo_cost_sum_down; + std::vector> pseudo_cost_num_up; + std::vector> pseudo_cost_num_down; std::vector pseudo_cost_mutex_up; std::vector pseudo_cost_mutex_down; omp_atomic_t strong_branching_lp_iter = 0; - - void update_pseudocost_down(i_t j, f_t delta); - void update_pseudocost_up(i_t j, f_t delta); - - i_t get_pseudocost_num_down(i_t j); - i_t get_pseudocost_num_up(i_t j); - - void lock_variable_up(i_t j); - void lock_variable_down(i_t j); - void unlock_variable_up(i_t j); - void unlock_variable_down(i_t j); }; template From bc3d50b5d407c12bb39d3e4420f7a3b5c0a7a292 Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Tue, 5 May 2026 12:15:33 +0200 Subject: [PATCH 53/53] fixed compilation Signed-off-by: Nicolas L. Guidotti --- cpp/src/branch_and_bound/branch_and_bound.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index cd2d10ffd6..4418dd798f 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -844,7 +844,6 @@ branch_variable_t branch_and_bound_t::variable_selection( exploration_stats_, upper_bound_, bfs_worker_pool_.num_idle_workers(), - log, new_slacks_, original_lp_); } else { @@ -1032,9 +1031,7 @@ struct deterministic_bfs_policy_t const std::vector& fractional, const std::vector& x) override { - logger_t log; - log.log = false; - i_t var = this->worker.pc_snapshot.variable_selection(fractional, x, log); + i_t var = this->worker.pc_snapshot.variable_selection(fractional, x); auto dir = martin_criteria(x[var], this->bnb.root_relax_soln_.x[var]); return {var, dir}; } @@ -1046,7 +1043,7 @@ struct deterministic_bfs_policy_t logger_t log; log.log = false; node->objective_estimate = - this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound, log); + this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound); } void on_node_completed(mip_node_t* node, @@ -1191,9 +1188,6 @@ std::pair branch_and_bound_t::updat worker->recompute_basis = true; worker->recompute_bounds = true; - worker->recompute_basis = true; - worker->recompute_bounds = true; - if (lp_status == dual::status_t::DUAL_UNBOUNDED) { node_ptr->lower_bound = inf; policy.graphviz(search_tree, node_ptr, "infeasible", 0.0); @@ -2215,8 +2209,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (num_fractional != 0 && settings_.max_cut_passes > 0) { settings_.log.printf( - " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | " - " " + " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | " "Gap " "| Time |\n"); }