diff --git a/cpp/src/branch_and_bound/CMakeLists.txt b/cpp/src/branch_and_bound/CMakeLists.txt index 5bb1017120..1e40c1bbf1 100644 --- a/cpp/src/branch_and_bound/CMakeLists.txt +++ b/cpp/src/branch_and_bound/CMakeLists.txt @@ -5,7 +5,6 @@ set(BRANCH_AND_BOUND_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/mip_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pseudo_costs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp ) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index e69ff7b9a5..4418dd798f 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -6,6 +6,7 @@ /* clang-format on */ #include +#include #include #include @@ -24,6 +25,7 @@ #include #include +#include #include #include @@ -32,18 +34,13 @@ #include #include #include -#include #include #include -#include -#include #include #include -#include #include namespace cuopt::linear_programming::dual_simplex { - namespace { template @@ -258,7 +255,7 @@ branch_and_bound_t::branch_and_bound_t( incumbent_(1), root_relax_soln_(1, 1), root_crossover_soln_(1, 1), - pc_(1), + pc_(1, solver_settings), solver_status_(mip_status_t::UNSET) { exploration_stats_.start_time = start_time; @@ -299,11 +296,8 @@ branch_and_bound_t::branch_and_bound_t( template f_t branch_and_bound_t::get_lower_bound() { - f_t lower_bound = lower_bound_ceiling_.load(); - f_t heap_lower_bound = node_queue_.get_lower_bound(); - f_t worker_lower_bound = worker_pool_.get_lower_bound(); - lower_bound = std::min(heap_lower_bound, lower_bound); - lower_bound = std::min(worker_lower_bound, lower_bound); + f_t lower_bound = lower_bound_ceiling_.load(); + lower_bound = std::min(bfs_worker_pool_.get_lower_bound(), lower_bound); if (std::isfinite(lower_bound)) { return lower_bound; @@ -810,7 +804,7 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, // Technische Universit¨at Berlin, Berlin, 1999. Accessed: Aug. 08, 2025. // [Online]. Available: https://opus4.kobv.de/opus4-zib/frontdoor/index/index/docId/391 template -rounding_direction_t martin_criteria(f_t val, f_t root_val) +branch_direction_t martin_criteria(f_t val, f_t root_val) { const f_t down_val = std::floor(root_val); const f_t up_val = std::ceil(root_val); @@ -819,10 +813,10 @@ rounding_direction_t martin_criteria(f_t val, f_t root_val) constexpr f_t eps = 1e-6; if (down_dist < up_dist + eps) { - return rounding_direction_t::DOWN; + return branch_direction_t::DOWN; } else { - return rounding_direction_t::UP; + return branch_direction_t::UP; } } @@ -833,14 +827,14 @@ branch_variable_t branch_and_bound_t::variable_selection( branch_and_bound_worker_t* worker) { logger_t log; - log.log = false; - i_t branch_var = -1; - rounding_direction_t round_dir = rounding_direction_t::NONE; + log.log = false; + i_t branch_var = -1; + branch_direction_t round_dir = branch_direction_t::NONE; std::vector current_incumbent; std::vector& solution = worker->leaf_solution.x; switch (worker->search_strategy) { - case search_strategy_t::BEST_FIRST: + case BEST_FIRST: if (settings_.reliability_branching != 0) { branch_var = pc_.reliable_variable_selection(node_ptr, @@ -848,31 +842,30 @@ branch_variable_t branch_and_bound_t::variable_selection( worker, var_types_, exploration_stats_, - settings_, upper_bound_, - worker_pool_.num_idle_workers(), - log, + bfs_worker_pool_.num_idle_workers(), new_slacks_, original_lp_); } else { - branch_var = pc_.variable_selection(fractional, solution, log); + branch_var = pc_.variable_selection(fractional, solution); } round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); return {branch_var, round_dir}; - case search_strategy_t::COEFFICIENT_DIVING: + case COEFFICIENT_DIVING: return coefficient_diving( original_lp_, fractional, solution, var_up_locks_, var_down_locks_, log); - case search_strategy_t::LINE_SEARCH_DIVING: + case LINE_SEARCH_DIVING: return line_search_diving(fractional, solution, root_relax_soln_.x, log); - case search_strategy_t::PSEUDOCOST_DIVING: + case PSEUDOCOST_DIVING: return pseudocost_diving(pc_, fractional, solution, root_relax_soln_.x, log); - case search_strategy_t::GUIDED_DIVING: + case GUIDED_DIVING: + assert(incumbent_.has_incumbent); mutex_upper_.lock(); current_incumbent = incumbent_.x; mutex_upper_.unlock(); @@ -880,7 +873,7 @@ branch_variable_t branch_and_bound_t::variable_selection( default: log.debug("Unknown variable selection method: %d\n", worker->search_strategy); - return {-1, rounding_direction_t::NONE}; + return {-1, branch_direction_t::NONE}; } } @@ -907,7 +900,7 @@ struct tree_update_policy_t { const std::vector& x) = 0; virtual void on_node_completed(mip_node_t* node, node_status_t status, - rounding_direction_t dir) = 0; + branch_direction_t dir) = 0; virtual void on_numerical_issue(mip_node_t*) = 0; virtual void graphviz(search_tree_t&, mip_node_t*, const char*, f_t) = 0; virtual void on_optimal_callback(const std::vector&, f_t) = 0; @@ -952,9 +945,7 @@ struct nondeterministic_policy_t : tree_update_policy_t { const std::vector& x) override { if (worker->search_strategy == search_strategy_t::BEST_FIRST) { - logger_t pc_log; - pc_log.log = false; - node->objective_estimate = bnb.pc_.obj_estimate(fractional, x, node->lower_bound, pc_log); + node->objective_estimate = bnb.pc_.obj_estimate(fractional, x, node->lower_bound); } } @@ -986,7 +977,7 @@ struct nondeterministic_policy_t : tree_update_policy_t { } } - void on_node_completed(mip_node_t*, node_status_t, rounding_direction_t) override {} + void on_node_completed(mip_node_t*, node_status_t, branch_direction_t) override {} }; template @@ -1005,7 +996,7 @@ struct deterministic_policy_base_t : tree_update_policy_t { { if (node->branch_var < 0) return; f_t change = std::max(leaf_obj - node->lower_bound, f_t(0)); - f_t frac = node->branch_dir == rounding_direction_t::DOWN + f_t frac = node->branch_dir == branch_direction_t::DOWN ? node->fractional_val - std::floor(node->fractional_val) : std::ceil(node->fractional_val) - node->fractional_val; if (frac > 1e-10) { @@ -1049,13 +1040,15 @@ struct deterministic_bfs_policy_t const std::vector& fractional, const std::vector& x) override { + logger_t log; + log.log = false; node->objective_estimate = this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound); } void on_node_completed(mip_node_t* node, node_status_t status, - rounding_direction_t dir) override + branch_direction_t dir) override { switch (status) { case node_status_t::INFEASIBLE: this->worker.record_infeasible(node); break; @@ -1090,12 +1083,12 @@ struct deterministic_diving_policy_t : deterministic_policy_base_t> { using base = deterministic_policy_base_t>; - std::deque*>& stack; + circular_deque_t*>& stack; i_t max_backtrack_depth; deterministic_diving_policy_t(branch_and_bound_t& bnb, deterministic_diving_worker_t& worker, - std::deque*>& stack, + circular_deque_t*>& stack, i_t max_backtrack_depth) : base(bnb, worker), stack(stack), max_backtrack_depth(max_backtrack_depth) { @@ -1115,25 +1108,28 @@ struct deterministic_diving_policy_t const std::vector& fractional, const std::vector& x) override { + logger_t log; + log.log = false; + switch (this->worker.diving_type) { case search_strategy_t::PSEUDOCOST_DIVING: - return this->worker.variable_selection_from_snapshot(fractional, x); + return pseudocost_diving( + this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); case search_strategy_t::LINE_SEARCH_DIVING: - if (this->worker.root_solution) { - logger_t log; - log.log = false; - return line_search_diving(fractional, x, *this->worker.root_solution, log); - } - return this->worker.variable_selection_from_snapshot(fractional, x); + return line_search_diving(fractional, x, *this->worker.root_solution, log); case search_strategy_t::GUIDED_DIVING: - return this->worker.guided_variable_selection(fractional, x); + if (this->worker.incumbent_snapshot.empty()) { + return pseudocost_diving( + this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log); + } else { + return guided_diving( + this->worker.pc_snapshot, fractional, x, this->worker.incumbent_snapshot, log); + } case search_strategy_t::COEFFICIENT_DIVING: { - logger_t log; - log.log = false; - return coefficient_diving(this->bnb.original_lp_, + return coefficient_diving(this->worker.leaf_problem, fractional, x, this->bnb.var_up_locks_, @@ -1141,7 +1137,7 @@ struct deterministic_diving_policy_t log); } - default: return this->worker.variable_selection_from_snapshot(fractional, x); + default: CUOPT_LOG_ERROR("Invalid diving method!"); return {-1, branch_direction_t::NONE}; } } @@ -1153,10 +1149,10 @@ struct deterministic_diving_policy_t void on_node_completed(mip_node_t* node, node_status_t status, - rounding_direction_t dir) override + branch_direction_t dir) override { if (status == node_status_t::HAS_CHILDREN) { - if (dir == rounding_direction_t::UP) { + if (dir == branch_direction_t::UP) { stack.push_front(node->get_down_child()); stack.push_front(node->get_up_child()); } else { @@ -1175,7 +1171,7 @@ struct deterministic_diving_policy_t template template -std::pair branch_and_bound_t::update_tree_impl( +std::pair branch_and_bound_t::update_tree_impl( mip_node_t* node_ptr, search_tree_t& search_tree, WorkerT* worker, @@ -1187,7 +1183,10 @@ std::pair branch_and_bound_t::upd lp_solution_t& leaf_solution = worker->leaf_solution; const f_t upper_bound = policy.upper_bound(); node_status_t status = node_status_t::PENDING; - rounding_direction_t round_dir = rounding_direction_t::NONE; + branch_direction_t round_dir = branch_direction_t::NONE; + + worker->recompute_basis = true; + worker->recompute_bounds = true; if (lp_status == dual::status_t::DUAL_UNBOUNDED) { node_ptr->lower_bound = inf; @@ -1243,11 +1242,13 @@ std::pair branch_and_bound_t::upd policy.select_branch_variable(node_ptr, leaf_fractional, leaf_solution.x); round_dir = dir; - assert(node_ptr->vstatus.size() == leaf_problem.num_cols); + assert(worker->leaf_vstatus.size() == leaf_problem.num_cols); assert(branch_var >= 0); - assert(dir != rounding_direction_t::NONE); + assert(dir != branch_direction_t::NONE); policy.update_objective_estimate(node_ptr, leaf_fractional, leaf_solution.x); + worker->recompute_basis = false; + worker->recompute_bounds = false; logger_t log; log.log = false; @@ -1255,7 +1256,7 @@ std::pair branch_and_bound_t::upd branch_var, leaf_solution.x[branch_var], num_frac, - node_ptr->vstatus, + worker->leaf_vstatus, leaf_problem, log); search_tree.update(node_ptr, node_status_t::HAS_CHILDREN); @@ -1284,7 +1285,7 @@ std::pair branch_and_bound_t::upd } template -std::pair branch_and_bound_t::update_tree( +std::pair branch_and_bound_t::update_tree( mip_node_t* node_ptr, search_tree_t& search_tree, branch_and_bound_worker_t* worker, @@ -1338,8 +1339,9 @@ dual::status_t branch_and_bound_t::solve_node_lp( } #endif - std::vector& leaf_vstatus = node_ptr->vstatus; - assert(leaf_vstatus.size() == worker->leaf_problem.num_cols); + worker->leaf_vstatus = + decompress_vstatus(node_ptr->packed_vstatus, worker->leaf_problem.num_cols); + assert(worker->leaf_vstatus.size() == worker->leaf_problem.num_cols); simplex_solver_settings_t lp_settings = settings_; lp_settings.concurrent_halt = &node_concurrent_halt_; @@ -1377,7 +1379,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( node_ptr->node_id, node_ptr->depth, node_ptr->branch_var, - node_ptr->branch_dir == rounding_direction_t::DOWN ? "DOWN" : "UP", + node_ptr->branch_dir == branch_direction_t::DOWN ? "DOWN" : "UP", node_ptr->fractional_val, node_ptr->branch_var_lower, node_ptr->branch_var_upper, @@ -1398,7 +1400,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( lp_start_time, worker->leaf_problem, lp_settings, - leaf_vstatus, + worker->leaf_vstatus, worker->basis_factors, worker->basic_list, worker->nonbasic_list, @@ -1415,7 +1417,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( worker->basis_factors, worker->basic_list, worker->nonbasic_list, - leaf_vstatus, + worker->leaf_vstatus, worker->leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); @@ -1431,11 +1433,19 @@ dual::status_t branch_and_bound_t::solve_node_lp( return lp_status; } + template -void branch_and_bound_t::plunge_with(branch_and_bound_worker_t* worker) +void branch_and_bound_t::plunge_with(bfs_worker_t* worker, + mip_node_t* start_node) { - std::deque*> stack; - stack.push_front(worker->start_node); + assert(worker != nullptr && worker->is_active.load()); + assert(start_node != nullptr); + + // Stack holds at most 2 entries: the preferred child + its sibling. + // The sibling is evicted to the queue before a new pair of children is added. + circular_deque_t*> stack(4); + stack.push_front(start_node); + worker->recompute_basis = true; worker->recompute_bounds = true; @@ -1446,6 +1456,36 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t 0 && (solver_status_ == mip_status_t::UNSET && is_running_) && rel_gap > settings_.relative_mip_gap_tol && abs_gap > settings_.absolute_mip_gap_tol) { + if (worker->worker_id == 0) { repair_heuristic_solutions(); } + + // Launch a new diving task if any worker is idle + if (worker->total_active_diving_workers < worker->total_max_diving_workers && + worker->node_queue.diving_queue_size() > 0) { + launch_diving_worker(worker); + } + + // If any best-first worker become idle, + if (bfs_worker_pool_.num_idle_workers() > 0 && worker->node_queue.best_first_queue_size() > 0) { + worker->node_queue.lock(); + mip_node_t* node = worker->node_queue.pop_best_first(); + + // We need to temporarily save the lower bound in this worker so it is + // considered when calculating the global lower bound. + f_t node_lower_bound = node ? node->lower_bound : std::numeric_limits::infinity(); + worker->lower_bound = std::min(worker->lower_bound.load(), node_lower_bound); + + worker->node_queue.unlock(); + + if (node != nullptr) { + if (!launch_bfs_worker({node})) { + worker->node_queue.lock(); + worker->node_queue.push(node); + worker->node_queue.unlock(); + } + } + } + + assert(stack.size() <= 2); mip_node_t* node_ptr = stack.front(); stack.pop_front(); @@ -1455,7 +1495,7 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_tlower_bound = node_ptr->lower_bound; + worker->lower_bound = std::min(worker->node_queue.get_lower_bound(), node_ptr->lower_bound); if (node_ptr->lower_bound > upper_bound_.load()) { search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); @@ -1466,13 +1506,31 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t settings_.time_limit) { + f_t now = toc(exploration_stats_.start_time); + + if (worker->worker_id == 0) { + f_t time_since_last_log = + exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); + i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; + + if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + report(' ', upper_bound_, lower_bound, node_ptr->depth, node_ptr->integer_infeasible); + exploration_stats_.last_log = tic(); + exploration_stats_.nodes_since_last_log = 0; + } + } + + if (now > settings_.time_limit) { solver_status_ = mip_status_t::TIME_LIMIT; + stack.push_front(node_ptr); break; } if (exploration_stats_.nodes_explored >= settings_.node_limit) { solver_status_ = mip_status_t::NODE_LIMIT; + stack.push_front(node_ptr); break; } @@ -1480,11 +1538,17 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_t::plunge_with(branch_and_bound_worker_t 0) { mip_node_t* node = stack.back(); stack.pop_back(); - node_queue_.push(node); + worker->node_queue.lock(); + worker->node_queue.push(node); + worker->node_queue.unlock(); } exploration_stats_.nodes_unexplored += 2; - if (round_dir == rounding_direction_t::UP) { - if (node_queue_.best_first_queue_size() < min_node_queue_size_) { - node_queue_.push(node_ptr->get_down_child()); + if (round_dir == branch_direction_t::UP) { + if (worker->node_queue.best_first_queue_size() < min_node_queue_size_) { + worker->node_queue.lock(); + worker->node_queue.push(node_ptr->get_down_child()); + worker->node_queue.unlock(); } else { stack.push_front(node_ptr->get_down_child()); } stack.push_front(node_ptr->get_up_child()); } else { - if (node_queue_.best_first_queue_size() < min_node_queue_size_) { - node_queue_.push(node_ptr->get_up_child()); + if (worker->node_queue.best_first_queue_size() < min_node_queue_size_) { + worker->node_queue.lock(); + worker->node_queue.push(node_ptr->get_up_child()); + worker->node_queue.unlock(); } else { stack.push_front(node_ptr->get_up_child()); } @@ -1536,31 +1606,115 @@ void branch_and_bound_t::plunge_with(branch_and_bound_worker_tnode_queue.lock(); + worker->node_queue.push(node); + worker->node_queue.unlock(); + } +} - if (stack.size() > 0 && - (rel_gap <= settings_.relative_mip_gap_tol || abs_gap <= settings_.absolute_mip_gap_tol)) { - // If the solver converged according to the gap rules, but we still have nodes to explore - // in the stack, then we should add all the pending nodes back to the heap so the lower - // bound of the solver is set to the correct value. - while (!stack.empty()) { - auto node = stack.front(); - stack.pop_front(); - node_queue_.push(node); - } +template +bfs_worker_t* branch_and_bound_t::launch_bfs_worker( + const std::vector*>& start_nodes) +{ + // Take an idle node from the pool + bfs_worker_t* idle_worker = bfs_worker_pool_.pop_idle_worker(); + if (!idle_worker) { return nullptr; } + + if (toc(exploration_stats_.start_time) > settings_.time_limit || + solver_status_ != mip_status_t::UNSET) { + bfs_worker_pool_.return_worker_to_pool(idle_worker); + return nullptr; } - if (settings_.num_threads > 1) { - worker_pool_.return_worker_to_pool(worker); - active_workers_per_strategy_[BEST_FIRST]--; + idle_worker->init(start_nodes); + idle_worker->set_active(); + +#pragma omp task affinity(*idle_worker) priority(99) + best_first_search_with(idle_worker); + + return idle_worker; +} + +template +void branch_and_bound_t::best_first_search_with(bfs_worker_t* worker) +{ + f_t lower_bound = get_lower_bound(); + f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + f_t steal_chance = settings_.bnb_steal_chance >= 0 ? settings_.bnb_steal_chance : 0.05; + + worker->calculate_num_diving_workers(bfs_worker_pool_.num_workers(), + diving_worker_pool_.num_workers(), + has_solver_space_incumbent(), + settings_.diving_settings); + + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + worker->node_queue.best_first_queue_size() > 0) { + // If the guided diving was disabled previously due to the lack of an incumbent solution, + // re-enable as soon as a new incumbent is found. + if (diving_worker_pool_.num_workers() > 0 && settings_.diving_settings.guided_diving != 0 && + worker->max_diving_workers[GUIDED_DIVING] == 0) { + if (has_solver_space_incumbent()) { + worker->calculate_num_diving_workers(bfs_worker_pool_.num_workers(), + diving_worker_pool_.num_workers(), + has_solver_space_incumbent(), + settings_.diving_settings); + } + } + + worker->node_queue.lock(); + mip_node_t* start_node = worker->node_queue.pop_best_first(); + if (!start_node) { + worker->node_queue.unlock(); + continue; + } + + worker->lower_bound = start_node->lower_bound; + worker->node_queue.unlock(); + + if (upper_bound_.load() < start_node->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree_.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); + search_tree_.update(start_node, node_status_t::FATHOMED); + continue; + } + + plunge_with(worker, start_node); + + lower_bound = get_lower_bound(); + abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { + node_concurrent_halt_ = 1; + solver_status_ = mip_status_t::OPTIMAL; + break; + } + + // Steal a node with some probability or when it is empty. The victim is determined at random. + if (worker->node_queue.best_first_queue_size() == 0 || + worker->rng.next_double() < steal_chance) { + for (i_t i = 0; i < settings_.bnb_max_steal_attempts; ++i) { + i_t k = worker->rng.uniform(0, bfs_worker_pool_.num_workers()); + if (worker->steal_node_from(bfs_worker_pool_[k], settings_.bnb_nodes_per_steal)) { break; } + } + } } + + worker->set_inactive(); + bfs_worker_pool_.return_worker_to_pool(worker); } template -void branch_and_bound_t::dive_with(branch_and_bound_worker_t* worker) +void branch_and_bound_t::dive_with(diving_worker_t* worker) { raft::common::nvtx::range scope("BB::diving_thread"); logger_t log; @@ -1573,8 +1727,8 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t worker->recompute_basis = true; worker->recompute_bounds = true; - search_tree_t dive_tree(std::move(*worker->start_node)); - std::deque*> stack; + search_tree_t dive_tree(std::move(worker->start_node)); + circular_deque_t*> stack(2 * diving_backtrack_limit + 4); stack.push_front(&dive_tree.root); branch_and_bound_stats_t dive_stats; @@ -1609,11 +1763,9 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t if (lp_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; break; - } else if (lp_status == dual::status_t::CONCURRENT_LIMIT) { - break; - } else if (lp_status == dual::status_t::ITERATION_LIMIT) { - break; } + if (lp_status == dual::status_t::CONCURRENT_LIMIT) { break; } + if (lp_status == dual::status_t::ITERATION_LIMIT) { break; } ++dive_stats.nodes_explored; @@ -1623,7 +1775,7 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t worker->recompute_bounds = node_status != node_status_t::HAS_CHILDREN; if (node_status == node_status_t::HAS_CHILDREN) { - if (round_dir == rounding_direction_t::UP) { + if (round_dir == branch_direction_t::UP) { stack.push_front(node_ptr->get_down_child()); stack.push_front(node_ptr->get_up_child()); } else { @@ -1632,7 +1784,7 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t } } - // Remove nodes that we no longer can backtrack to (i.e., from the current node, we can only + // Remove nodes that we can no longer backtrack to (i.e., from the current node, we can only // backtrack to a node that is has a depth of at most 5 levels lower than the current node). if (stack.size() > 1 && stack.front()->depth - stack.back()->depth > diving_backtrack_limit) { stack.pop_back(); @@ -1644,219 +1796,80 @@ void branch_and_bound_t::dive_with(branch_and_bound_worker_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound, lower_bound); } - worker_pool_.return_worker_to_pool(worker); - active_workers_per_strategy_[search_strategy]--; + worker->set_inactive(); + diving_worker_pool_.return_worker_to_pool(worker); } template -void branch_and_bound_t::run_scheduler() +bool branch_and_bound_t::launch_diving_worker(bfs_worker_t* bfs_worker) { - diving_heuristics_settings_t diving_settings = settings_.diving_settings; - const i_t num_workers = 2 * settings_.num_threads; - - if (!has_solver_space_incumbent()) { diving_settings.guided_diving = false; } - std::vector strategies = get_search_strategies(diving_settings); - std::array max_num_workers_per_type = - get_max_workers(num_workers, strategies); + // Get an idle worker. + diving_worker_t* diving_worker = diving_worker_pool_.pop_idle_worker(); + if (diving_worker == nullptr) { return false; } - worker_pool_.init(num_workers, original_lp_, Arow_, var_types_, settings_); - active_workers_per_strategy_.fill(0); + bfs_worker->node_queue.lock(); + mip_node_t* start_node = bfs_worker->node_queue.pop_diving(); -#ifdef CUOPT_LOG_DEBUG - for (auto strategy : strategies) { - settings_.log.debug("%c%d: max num of workers = %d", - feasible_solution_symbol(strategy), - strategy, - max_num_workers_per_type[strategy]); + if (!start_node || upper_bound_.load() < start_node->lower_bound || + start_node->depth < settings_.diving_settings.min_node_depth) { + bfs_worker->node_queue.unlock(); + diving_worker_pool_.return_worker_to_pool(diving_worker); + return false; } -#endif - - f_t lower_bound = get_lower_bound(); - f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - i_t last_node_depth = 0; - i_t last_int_infeas = 0; - - while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && - (active_workers_per_strategy_[0] > 0 || node_queue_.best_first_queue_size() > 0)) { - bool launched_any_task = false; - - repair_heuristic_solutions(); - - // If the guided diving was disabled previously due to the lack of an incumbent solution, - // re-enable as soon as a new incumbent is found. - if (settings_.diving_settings.guided_diving != diving_settings.guided_diving) { - if (has_solver_space_incumbent()) { - diving_settings.guided_diving = settings_.diving_settings.guided_diving; - strategies = get_search_strategies(diving_settings); - max_num_workers_per_type = get_max_workers(num_workers, strategies); - -#ifdef CUOPT_LOG_DEBUG - for (auto type : strategies) { - settings_.log.debug("%c%d: max num of workers = %d", - feasible_solution_symbol(type), - type, - max_num_workers_per_type[type]); - } -#endif - } - } - - f_t now = toc(exploration_stats_.start_time); - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; - - if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && - time_since_last_log >= 1) || - (time_since_last_log > 30) || now > settings_.time_limit) { - i_t queue_size = node_queue_.best_first_queue_size(); - i_t depth = queue_size > 0 ? node_queue_.bfs_top()->depth : last_node_depth; - i_t int_infeas = queue_size > 0 ? node_queue_.bfs_top()->integer_infeasible : last_int_infeas; - report(' ', upper_bound_, lower_bound, depth, int_infeas); - exploration_stats_.last_log = tic(); - exploration_stats_.nodes_since_last_log = 0; - } - - if (now > settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - break; - } - - for (auto strategy : strategies) { - if (active_workers_per_strategy_[strategy] >= max_num_workers_per_type[strategy]) { - continue; - } - - // Get an idle worker. - branch_and_bound_worker_t* worker = worker_pool_.get_idle_worker(); - if (worker == nullptr) { break; } - - if (strategy == BEST_FIRST) { - // If there any node left in the heap, we pop the top node and explore it. - std::optional*> start_node = node_queue_.pop_best_first(); - if (!start_node.has_value()) { continue; } - if (upper_bound_.load() < start_node.value()->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the - // current upper bound - search_tree_.graphviz_node( - settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - search_tree_.update(start_node.value(), node_status_t::FATHOMED); - continue; - } + diving_worker->init(start_node, original_lp_); + bfs_worker->node_queue.unlock(); - // Remove the worker from the idle list. - worker_pool_.pop_idle_worker(); - worker->init_best_first(start_node.value(), original_lp_); - last_node_depth = start_node.value()->depth; - last_int_infeas = start_node.value()->integer_infeasible; - active_workers_per_strategy_[strategy]++; - launched_any_task = true; + bool is_feasible = diving_worker->presolve_start_bounds(settings_); + if (!is_feasible) { + diving_worker_pool_.return_worker_to_pool(diving_worker); + return false; + } -#pragma omp task affinity(worker) - plunge_with(worker); + if (toc(exploration_stats_.start_time) > settings_.time_limit || + solver_status_ != mip_status_t::UNSET) { + diving_worker_pool_.return_worker_to_pool(diving_worker); + return false; + } - } else { - std::optional*> start_node = node_queue_.pop_diving(); + for (int i = 1; i < num_search_strategies; ++i) { + auto strategy = search_strategies[i]; - if (!start_node.has_value()) { continue; } - if (upper_bound_.load() < start_node.value()->lower_bound || - start_node.value()->depth < diving_settings.min_node_depth) { - continue; - } + if (bfs_worker->active_diving_workers[strategy] < bfs_worker->max_diving_workers[strategy]) { + diving_worker->search_strategy = strategy; + diving_worker->bfs_worker = bfs_worker; + diving_worker->set_active(); + bfs_worker->active_diving_workers[strategy]++; + bfs_worker->total_active_diving_workers++; - bool is_feasible = - worker->init_diving(start_node.value(), strategy, original_lp_, settings_); - if (!is_feasible) { continue; } + assert(bfs_worker->active_diving_workers[strategy].load() <= + bfs_worker->max_diving_workers[strategy]); + assert(bfs_worker->total_active_diving_workers.load() <= + bfs_worker->total_max_diving_workers); - // Remove the worker from the idle list. - worker_pool_.pop_idle_worker(); - active_workers_per_strategy_[strategy]++; - launched_any_task = true; +#pragma omp task affinity(*diving_worker) + dive_with(diving_worker); -#pragma omp task affinity(worker) - dive_with(worker); - } + return true; } - - lower_bound = get_lower_bound(); - abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - - if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { - node_concurrent_halt_ = 1; - solver_status_ = mip_status_t::OPTIMAL; - break; - } - - // If no new task was launched in this iteration, suspend temporarily the - // execution of the scheduler. As of 8/Jan/2026, GCC does not - // implement taskyield, but LLVM does. - if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } + + diving_worker_pool_.return_worker_to_pool(diving_worker); + return false; } template void branch_and_bound_t::single_threaded_solve() { - worker_pool_.init(1, original_lp_, Arow_, var_types_, settings_); - branch_and_bound_worker_t* worker = worker_pool_.get_idle_worker(); - - f_t lower_bound = get_lower_bound(); - f_t abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - - while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && node_queue_.best_first_queue_size() > 0) { - repair_heuristic_solutions(); - - f_t now = toc(exploration_stats_.start_time); - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; - - if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && - time_since_last_log >= 1) || - (time_since_last_log > 30) || now > settings_.time_limit) { - i_t depth = node_queue_.bfs_top()->depth; - i_t int_infeas = node_queue_.bfs_top()->integer_infeasible; - report(' ', upper_bound_, lower_bound, depth, int_infeas); - exploration_stats_.last_log = tic(); - exploration_stats_.nodes_since_last_log = 0; - } - - if (now > settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - break; - } - - // If there any node left in the heap, we pop the top node and explore it. - std::optional*> start_node = node_queue_.pop_best_first(); - - if (!start_node.has_value()) { continue; } - if (upper_bound_.load() < start_node.value()->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the - // current upper bound - search_tree_.graphviz_node( - settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - search_tree_.update(start_node.value(), node_status_t::FATHOMED); - continue; - } - - worker->init_best_first(start_node.value(), original_lp_); - plunge_with(worker); - - lower_bound = get_lower_bound(); - abs_gap = compute_user_abs_gap(original_lp_, upper_bound_.load(), lower_bound); - rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); - - if (abs_gap <= settings_.absolute_mip_gap_tol || rel_gap <= settings_.relative_mip_gap_tol) { - solver_status_ = mip_status_t::OPTIMAL; - break; - } - } + bfs_worker_pool_.init(1, original_lp_, Arow_, var_types_, settings_); + bfs_worker_t* worker = bfs_worker_pool_.pop_idle_worker(); + + node_queue_t& node_queue = worker->node_queue; + node_queue.push(search_tree_.root.get_down_child()); + node_queue.push(search_tree_.root.get_up_child()); + worker->lower_bound = worker->node_queue.get_lower_bound(); + worker->set_active(); + best_first_search_with(worker); } template @@ -1894,7 +1907,6 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( while (!root_crossover_solution_set_.load(std::memory_order_acquire) && *get_root_concurrent_halt() == 0) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); - continue; } if (root_crossover_solution_set_.load(std::memory_order_acquire)) { @@ -2507,7 +2519,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); pc_.resize(original_lp_.num_cols); - original_lp_.A.transpose(pc_.AT); + original_lp_.A.transpose(*pc_.AT); { raft::common::nvtx::range scope_sb("BB::strong_branching"); strong_branching(original_lp_, @@ -2578,7 +2590,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } // Choose variable to branch on - i_t branch_var = pc_.variable_selection(fractional, root_relax_soln_.x, log); + i_t branch_var = pc_.variable_selection(fractional, root_relax_soln_.x); search_tree_.root = std::move(mip_node_t(root_objective_, root_vstatus_)); search_tree_.num_nodes = 0; @@ -2590,8 +2602,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_vstatus_, original_lp_, log); - node_queue_.push(search_tree_.root.get_down_child()); - node_queue_.push(search_tree_.root.get_up_child()); settings_.log.printf("Exploring the B&B tree using %d threads\n\n", settings_.num_threads); node_concurrent_halt_ = 0; @@ -2600,7 +2610,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut exploration_stats_.nodes_unexplored = 2; exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); - min_node_queue_size_ = 2 * settings_.num_threads; + min_node_queue_size_ = 20; if (settings_.diving_settings.coefficient_diving != 0) { calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); @@ -2618,10 +2628,24 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (settings_.deterministic) { run_deterministic_coordinator(Arow_); } else if (settings_.num_threads > 1) { + const i_t num_workers = settings_.num_threads; + const i_t num_bfs_workers = std::max(settings_.num_threads / 2, 1); + const i_t num_diving_workers = num_workers - num_bfs_workers; + bfs_worker_pool_.init(num_bfs_workers, original_lp_, Arow_, var_types_, settings_); + diving_worker_pool_.init( + num_diving_workers, original_lp_, Arow_, var_types_, settings_, num_bfs_workers); + #pragma omp parallel num_threads(settings_.num_threads) { #pragma omp master - run_scheduler(); + { + if (num_bfs_workers > 1) { + launch_bfs_worker({search_tree_.root.get_up_child()}); + launch_bfs_worker({search_tree_.root.get_down_child()}); + } else { + launch_bfs_worker({search_tree_.root.get_up_child(), search_tree_.root.get_down_child()}); + } + } } } else { single_threaded_solve(); @@ -2635,31 +2659,33 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut lower_bound = deterministic_compute_lower_bound(); solver_status_ = deterministic_global_termination_status_; } else { - if (node_queue_.best_first_queue_size() > 0) { + lower_bound = std::numeric_limits::infinity(); + + for (int i = 0; i < bfs_worker_pool_.num_workers(); ++i) { + bfs_worker_t* worker = bfs_worker_pool_[i]; + // We need to clear the queue and use the info in the search tree for the lower bound - while (node_queue_.best_first_queue_size() > 0) { - std::optional*> start_node = node_queue_.pop_best_first(); + while (worker->node_queue.best_first_queue_size() > 0) { + mip_node_t* start_node = worker->node_queue.pop_best_first(); - if (!start_node.has_value()) { continue; } - if (upper_bound_.load() < start_node.value()->lower_bound) { + if (!start_node) { continue; } + if (upper_bound_.load() < start_node->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound - search_tree_.graphviz_node( - settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - search_tree_.update(start_node.value(), node_status_t::FATHOMED); - continue; + search_tree_.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); + search_tree_.update(start_node, node_status_t::FATHOMED); } else { - node_queue_.push( - start_node.value()); // Needed to ensure we don't lose the correct lower bound + // Needed to ensure we don't lose the correct lower bound + worker->node_queue.push(start_node); + lower_bound = std::min(lower_bound, worker->node_queue.get_lower_bound()); break; } } - lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() - : search_tree_.root.lower_bound; - } else { - lower_bound = search_tree_.root.lower_bound; } + + if (!std::isfinite(lower_bound)) { lower_bound = search_tree_.root.lower_bound; } } + set_final_solution(solution, lower_bound); return solver_status_; } @@ -2788,17 +2814,9 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri deterministic_horizon_step_ = 0.50; // Compute worker counts using the same formula as reliability-branching scheduler - const i_t num_workers = 2 * settings_.num_threads; - std::vector search_strategies = - get_search_strategies(settings_.diving_settings); - std::array max_num_workers = - get_max_workers(num_workers, search_strategies); - - const int num_bfs_workers = max_num_workers[search_strategy_t::BEST_FIRST]; - int num_diving_workers = 0; - for (size_t i = 1; i < search_strategies.size(); ++i) { - num_diving_workers += max_num_workers[search_strategies[i]]; - } + const i_t num_workers = settings_.num_threads; + const i_t num_bfs_workers = std::max(num_workers / 2, 1); + const i_t num_diving_workers = num_workers - num_bfs_workers; deterministic_mode_enabled_ = true; deterministic_current_horizon_ = deterministic_horizon_step_; @@ -3197,10 +3215,10 @@ node_status_t branch_and_bound_t::solve_node_deterministic( // Solve LP relaxation worker.leaf_solution.resize(worker.leaf_problem.num_rows, worker.leaf_problem.num_cols); - std::vector& leaf_vstatus = node_ptr->vstatus; - i_t node_iter = 0; - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; + worker.leaf_vstatus = decompress_vstatus(node_ptr->packed_vstatus, worker.leaf_problem.num_cols); + i_t node_iter = 0; + f_t lp_start_time = tic(); + std::vector leaf_edge_norms = edge_norms_; dual::status_t lp_status = dual_phase2_with_advanced_basis(2, 0, @@ -3208,7 +3226,7 @@ node_status_t branch_and_bound_t::solve_node_deterministic( lp_start_time, worker.leaf_problem, lp_settings, - leaf_vstatus, + worker.leaf_vstatus, worker.basis_factors, worker.basic_list, worker.nonbasic_list, @@ -3226,7 +3244,7 @@ node_status_t branch_and_bound_t::solve_node_deterministic( worker.basis_factors, worker.basic_list, worker.nonbasic_list, - leaf_vstatus, + worker.leaf_vstatus, leaf_edge_norms, &worker.work_context); lp_status = convert_lp_status_to_dual_status(second_status); @@ -3322,11 +3340,12 @@ template void branch_and_bound_t::deterministic_broadcast_snapshots( PoolT& pool, const std::vector& incumbent_snapshot) { - deterministic_snapshot_t snap; - snap.upper_bound = upper_bound_.load(); - snap.total_lp_iters = exploration_stats_.total_lp_iters.load(); - snap.incumbent = incumbent_snapshot; - snap.pc_snapshot = pc_.create_snapshot(); + deterministic_snapshot_t snap{ + .upper_bound = upper_bound_, + .pc_snapshot = pc_, + .incumbent = incumbent_snapshot, + .total_lp_iters = exploration_stats_.total_lp_iters, + }; for (auto& worker : pool) { worker.set_snapshots(snap); @@ -3683,8 +3702,10 @@ void branch_and_bound_t::deterministic_assign_diving_nodes() continue; // this worker is full, try next one } - auto entry = diving_heap_.pop(); - if (entry.has_value()) { worker.enqueue_dive_node(entry.value().node, original_lp_); } + if (!diving_heap_.empty()) { + auto entry = diving_heap_.pop(); + worker.enqueue_dive_node(entry.node, original_lp_); + } } diving_heap_.clear(); @@ -3734,11 +3755,6 @@ void branch_and_bound_t::deterministic_dive( { raft::common::nvtx::range scope("BB::deterministic_dive"); - // Create local search tree for the dive - search_tree_t dive_tree(std::move(entry.node)); - std::deque*> stack; - stack.push_front(&dive_tree.root); - worker.dive_lower = std::move(entry.resolved_lower); worker.dive_upper = std::move(entry.resolved_upper); @@ -3748,6 +3764,11 @@ void branch_and_bound_t::deterministic_dive( worker.lp_iters_this_dive = 0; worker.recompute_bounds_and_basis = true; + // Create local search tree for the dive + search_tree_t dive_tree(std::move(entry.node)); + circular_deque_t*> stack(2 * max_backtrack_depth + 4); + stack.push_front(&dive_tree.root); + while (!stack.empty() && deterministic_global_termination_status_ == mip_status_t::UNSET && nodes_this_dive < max_nodes_per_dive) { mip_node_t* node_ptr = stack.front(); @@ -3808,18 +3829,19 @@ void branch_and_bound_t::deterministic_dive( // Solve LP relaxation worker.leaf_solution.resize(worker.leaf_problem.num_rows, worker.leaf_problem.num_cols); - std::vector& leaf_vstatus = node_ptr->vstatus; - i_t node_iter = 0; - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; + i_t node_iter = 0; + f_t lp_start_time = tic(); + std::vector leaf_edge_norms = edge_norms_; + worker.leaf_vstatus = + decompress_vstatus(node_ptr->packed_vstatus, worker.leaf_problem.num_cols); dual::status_t lp_status = dual_phase2_with_advanced_basis(2, 0, worker.recompute_bounds_and_basis, lp_start_time, worker.leaf_problem, lp_settings, - leaf_vstatus, + worker.leaf_vstatus, worker.basis_factors, worker.basic_list, worker.nonbasic_list, @@ -3836,7 +3858,7 @@ void branch_and_bound_t::deterministic_dive( worker.basis_factors, worker.basic_list, worker.nonbasic_list, - leaf_vstatus, + worker.leaf_vstatus, leaf_edge_norms, &worker.work_context); lp_status = convert_lp_status_to_dual_status(second_status); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index f2917ba930..254305f574 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -8,12 +8,12 @@ #pragma once #include -#include #include -#include #include #include #include +#include +#include #include @@ -235,18 +235,14 @@ class branch_and_bound_t { // Pseudocosts pseudo_costs_t pc_; - // Heap storing the nodes waiting to be explored. - node_queue_t node_queue_; - // Search tree search_tree_t search_tree_; - // Count the number of workers per type that either are being executed or - // are waiting to be executed. - std::array, num_search_strategies> active_workers_per_strategy_; + // Worker pool dedicated to the best-first search + bfs_worker_pool_t bfs_worker_pool_; - // Worker pool - branch_and_bound_worker_pool_t worker_pool_; + // Worker pool dedicated to diving + diving_worker_pool_t diving_worker_pool_; // Global status of the solver. omp_atomic_t solver_status_; @@ -288,21 +284,26 @@ class branch_and_bound_t { // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); + // Launch a new bfs worker initialized from the `start_node`. + bfs_worker_t* launch_bfs_worker(const std::vector*>&); + // Launch a new diving worker from a given bfs worker. The dive will start + // from the node at the top of the local heap. + bool launch_diving_worker(bfs_worker_t* bfs_worker); + + // Perform best-first search with a given bfs worker. + void best_first_search_with(bfs_worker_t* worker); + // We use best-first to pick the `start_node` and then perform a depth-first search // from this node (i.e., a plunge). It can only backtrack to a sibling node. // Unexplored nodes in the subtree are inserted back into the global heap. - void plunge_with(branch_and_bound_worker_t* worker); + void plunge_with(bfs_worker_t* worker, mip_node_t* start_node); // Perform a deep dive in the subtree determined by the `start_node` in order // to find integer feasible solutions. - void dive_with(branch_and_bound_worker_t* worker); - - // Run the scheduler whose will schedule and manage - // all the other workers. - void run_scheduler(); + void dive_with(diving_worker_t* worker); // Run the branch-and-bound algorithm in single threaded mode. - // This disable all diving heuristics. + // This disables all diving heuristics. void single_threaded_solve(); // Solve the LP relaxation of a leaf node @@ -318,7 +319,7 @@ class branch_and_bound_t { // Policy-based tree update shared between opportunistic and deterministic codepaths. template - std::pair update_tree_impl( + std::pair update_tree_impl( mip_node_t* node_ptr, search_tree_t& search_tree, WorkerT* worker, @@ -326,7 +327,7 @@ class branch_and_bound_t { Policy& policy); // Opportunistic tree update wrapper. - std::pair update_tree( + std::pair update_tree( mip_node_t* node_ptr, search_tree_t& search_tree, branch_and_bound_worker_t* worker, diff --git a/cpp/src/branch_and_bound/branch_and_bound_worker.hpp b/cpp/src/branch_and_bound/branch_and_bound_worker.hpp deleted file mode 100644 index 4de2b43cae..0000000000 --- a/cpp/src/branch_and_bound/branch_and_bound_worker.hpp +++ /dev/null @@ -1,281 +0,0 @@ -/* clang-format off */ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ -/* clang-format on */ - -#pragma once - -#include - -#include -#include -#include - -#include - -#include -#include -#include -#include - -namespace cuopt::linear_programming::dual_simplex { - -constexpr int num_search_strategies = 5; - -// Indicate the search and variable selection algorithms used by each thread -// in B&B (See [1]). -// -// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, -// Berlin, 2007. doi: 10.14279/depositonce-1634. -enum search_strategy_t : int { - BEST_FIRST = 0, // Best-First + Plunging. - PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) - LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) - GUIDED_DIVING = 3, // Guided diving (9.2.3). - COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) -}; - -template -struct branch_and_bound_stats_t { - f_t start_time = 0.0; - omp_atomic_t total_lp_solve_time = 0.0; - omp_atomic_t nodes_explored = 0; - omp_atomic_t nodes_unexplored = 0; - omp_atomic_t total_lp_iters = 0; - omp_atomic_t nodes_since_last_log = 0; - omp_atomic_t last_log = 0.0; -}; - -template -class branch_and_bound_worker_t { - public: - const i_t worker_id; - omp_atomic_t search_strategy; - omp_atomic_t is_active; - omp_atomic_t lower_bound; - - lp_problem_t leaf_problem; - lp_solution_t leaf_solution; - std::vector leaf_edge_norms; - - basis_update_mpf_t basis_factors; - std::vector basic_list; - std::vector nonbasic_list; - - bounds_strengthening_t node_presolver; - std::vector bounds_changed; - - std::vector start_lower; - std::vector start_upper; - mip_node_t* start_node; - - pcgenerator_t rng; - - bool recompute_basis = true; - bool recompute_bounds = true; - - branch_and_bound_worker_t(i_t worker_id, - const lp_problem_t& original_lp, - const csr_matrix_t& Arow, - const std::vector& var_type, - const simplex_solver_settings_t& settings) - : worker_id(worker_id), - search_strategy(BEST_FIRST), - is_active(false), - lower_bound(-std::numeric_limits::infinity()), - leaf_problem(original_lp), - leaf_solution(original_lp.num_rows, original_lp.num_cols), - basis_factors(original_lp.num_rows, settings.refactor_frequency), - basic_list(original_lp.num_rows), - nonbasic_list(), - node_presolver(leaf_problem, Arow, {}, var_type), - bounds_changed(original_lp.num_cols, false), - rng(settings.random_seed + pcgenerator_t::default_seed + worker_id, - pcgenerator_t::default_stream ^ worker_id) - { - } - - // Set the `start_node` for best-first search. - void init_best_first(mip_node_t* node, const lp_problem_t& original_lp) - { - start_node = node; - start_lower = original_lp.lower; - start_upper = original_lp.upper; - search_strategy = BEST_FIRST; - lower_bound = node->lower_bound; - is_active = true; - } - - // Initialize the worker for diving, setting the `start_node`, `start_lower` and - // `start_upper`. Returns `true` if the starting node is feasible via - // bounds propagation. - bool init_diving(mip_node_t* node, - search_strategy_t type, - const lp_problem_t& original_lp, - const simplex_solver_settings_t& settings) - { - internal_node = node->detach_copy(); - start_node = &internal_node; - - start_lower = original_lp.lower; - start_upper = original_lp.upper; - search_strategy = type; - lower_bound = node->lower_bound; - is_active = true; - - std::fill(bounds_changed.begin(), bounds_changed.end(), false); - node->get_variable_bounds(start_lower, start_upper, bounds_changed); - return node_presolver.bounds_strengthening(settings, bounds_changed, start_lower, start_upper); - } - - // Set the variables bounds for the LP relaxation of the current node. - bool set_lp_variable_bounds(mip_node_t* node_ptr, - const simplex_solver_settings_t& settings) - { - // Reset the bound_changed markers - std::fill(bounds_changed.begin(), bounds_changed.end(), false); - - // Set the correct bounds for the leaf problem - if (recompute_bounds) { - leaf_problem.lower = start_lower; - leaf_problem.upper = start_upper; - node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); - - } else { - node_ptr->update_branched_variable_bounds( - leaf_problem.lower, leaf_problem.upper, bounds_changed); - } - - return node_presolver.bounds_strengthening( - settings, bounds_changed, leaf_problem.lower, leaf_problem.upper); - } - - private: - // For diving, we need to store the full node instead of - // of just a pointer, since it is not stored in the tree anymore. - // To keep the same interface across all worker types, - // this will be used as a temporary storage and - // will be pointed by `start_node`. - // For exploration, this will not be used. - mip_node_t internal_node; -}; - -template -class branch_and_bound_worker_pool_t { - public: - void init(i_t num_workers, - const lp_problem_t& original_lp, - const csr_matrix_t& Arow, - const std::vector& var_type, - const simplex_solver_settings_t& settings) - { - workers_.resize(num_workers); - num_idle_workers_ = num_workers; - for (i_t i = 0; i < num_workers; ++i) { - workers_[i] = std::make_unique>( - i, original_lp, Arow, var_type, settings); - idle_workers_.push_front(i); - } - - is_initialized = true; - } - - // Here, we are assuming that the scheduler is the only - // thread that can retrieve/pop an idle worker. - branch_and_bound_worker_t* get_idle_worker() - { - std::lock_guard lock(mutex_); - if (idle_workers_.empty()) { - return nullptr; - } else { - i_t idx = idle_workers_.front(); - return workers_[idx].get(); - } - } - - // Here, we are assuming that the scheduler is the only - // thread that can retrieve/pop an idle worker. - void pop_idle_worker() - { - std::lock_guard lock(mutex_); - if (!idle_workers_.empty()) { - idle_workers_.pop_front(); - num_idle_workers_--; - } - } - - void return_worker_to_pool(branch_and_bound_worker_t* worker) - { - worker->is_active = false; - std::lock_guard lock(mutex_); - idle_workers_.push_back(worker->worker_id); - num_idle_workers_++; - } - - f_t get_lower_bound() - { - f_t lower_bound = std::numeric_limits::infinity(); - - if (is_initialized) { - for (i_t i = 0; i < workers_.size(); ++i) { - if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { - lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); - } - } - } - - return lower_bound; - } - - i_t num_idle_workers() { return num_idle_workers_; } - - private: - // Worker pool - std::vector>> workers_; - bool is_initialized = false; - - omp_mutex_t mutex_; - std::deque idle_workers_; - omp_atomic_t num_idle_workers_; -}; - -template -std::vector get_search_strategies( - diving_heuristics_settings_t settings) -{ - std::vector types; - types.reserve(num_search_strategies); - types.push_back(BEST_FIRST); - if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); } - if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); } - if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); } - if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); } - return types; -} - -template -std::array get_max_workers( - i_t num_workers, const std::vector& strategies) -{ - std::array max_num_workers; - max_num_workers.fill(0); - - i_t bfs_workers = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1); - max_num_workers[BEST_FIRST] = bfs_workers; - - i_t diving_workers = (num_workers - bfs_workers); - i_t m = strategies.size() - 1; - - for (size_t i = 1, k = 0; i < strategies.size(); ++i) { - i_t start = (double)k * diving_workers / m; - i_t end = (double)(k + 1) * diving_workers / m; - max_num_workers[strategies[i]] = end - start; - ++k; - } - - return max_num_workers; -} - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/constants.hpp b/cpp/src/branch_and_bound/constants.hpp new file mode 100644 index 0000000000..4693e6789a --- /dev/null +++ b/cpp/src/branch_and_bound/constants.hpp @@ -0,0 +1,35 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once +#include + +namespace cuopt::linear_programming::dual_simplex { + +constexpr int num_search_strategies = 5; + +// Indicate the search and variable selection algorithms used by each thread +// in B&B (See [1]). +// +// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, +// Berlin, 2007. doi: 10.14279/depositonce-1634. +enum search_strategy_t : int { + BEST_FIRST = 0, // Best-First + Plunging. + PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) + LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) + GUIDED_DIVING = 3, // Guided diving (9.2.3). + COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) +}; + +constexpr std::array search_strategies = { + BEST_FIRST, PSEUDOCOST_DIVING, LINE_SEARCH_DIVING, GUIDED_DIVING, COEFFICIENT_DIVING}; + +enum class branch_direction_t { NONE = -1, DOWN = 0, UP = 1 }; + +enum class branch_and_bound_mode_t { PARALLEL = 0, DETERMINISTIC = 1 }; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp index 7a074051c6..a63dad1bcc 100644 --- a/cpp/src/branch_and_bound/deterministic_workers.hpp +++ b/cpp/src/branch_and_bound/deterministic_workers.hpp @@ -8,9 +8,9 @@ #pragma once #include -#include #include #include +#include #include @@ -58,7 +58,7 @@ struct deterministic_snapshot_t { f_t upper_bound; pseudo_cost_snapshot_t pc_snapshot; std::vector incumbent; - i_t total_lp_iters; + int64_t total_lp_iters; }; template @@ -74,7 +74,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { // Diving-specific snapshots (ignored by BFS workers) std::vector incumbent_snapshot; - i_t total_lp_iters_snapshot{0}; + int64_t total_lp_iters_snapshot{0}; std::vector> integer_solutions; int next_solution_seq{0}; @@ -90,7 +90,9 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { const std::vector& var_types, const simplex_solver_settings_t& settings, const std::string& context_name) - : base_t(id, original_lp, Arow, var_types, settings), work_context(context_name) + : base_t(id, original_lp, Arow, var_types, settings), + work_context(context_name), + pc_snapshot(1, settings) { work_context.deterministic = true; } @@ -156,7 +158,7 @@ class deterministic_bfs_worker_t mip_node_t* enqueue_children_for_plunge(mip_node_t* down_child, mip_node_t* up_child, - rounding_direction_t preferred_direction) + branch_direction_t preferred_direction) { if (!plunge_stack.empty()) { backlog.push(plunge_stack.back()); @@ -169,7 +171,7 @@ class deterministic_bfs_worker_t up_child->creation_seq = next_creation_seq++; mip_node_t* first_child; - if (preferred_direction == rounding_direction_t::UP) { + if (preferred_direction == branch_direction_t::UP) { plunge_stack.push_front(down_child); plunge_stack.push_front(up_child); first_child = up_child; @@ -193,8 +195,8 @@ class deterministic_bfs_worker_t plunge_stack.pop_front(); return node; } - auto node_opt = backlog.pop(); - return node_opt.has_value() ? node_opt.value() : nullptr; + + return !backlog.empty() ? backlog.pop() : nullptr; } size_t queue_size() const @@ -342,22 +344,6 @@ class deterministic_diving_worker_t {objective, solution, depth, this->worker_id, this->next_solution_seq++}); ++this->total_integer_solutions; } - - branch_variable_t variable_selection_from_snapshot(const std::vector& fractional, - const std::vector& solution) const - { - assert(root_solution != nullptr); - return this->pc_snapshot.pseudocost_diving(fractional, solution, *root_solution); - } - - branch_variable_t guided_variable_selection(const std::vector& fractional, - const std::vector& solution) const - { - if (this->incumbent_snapshot.empty()) { - return variable_selection_from_snapshot(fractional, solution); - } - return this->pc_snapshot.guided_diving(fractional, solution, this->incumbent_snapshot); - } }; template diff --git a/cpp/src/branch_and_bound/diving_heuristics.cpp b/cpp/src/branch_and_bound/diving_heuristics.cpp index f9791280a6..4989e48c6a 100644 --- a/cpp/src/branch_and_bound/diving_heuristics.cpp +++ b/cpp/src/branch_and_bound/diving_heuristics.cpp @@ -7,8 +7,6 @@ #include -#include - namespace cuopt::linear_programming::dual_simplex { template @@ -17,26 +15,26 @@ branch_variable_t line_search_diving(const std::vector& fractional, const std::vector& root_solution, logger_t& log) { - constexpr f_t eps = 1e-6; - i_t branch_var = -1; - f_t min_score = std::numeric_limits::max(); - rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + i_t branch_var = -1; + f_t min_score = std::numeric_limits::max(); + branch_direction_t round_dir = branch_direction_t::NONE; for (i_t j : fractional) { - f_t score = inf; - rounding_direction_t dir = rounding_direction_t::NONE; + f_t score = inf; + branch_direction_t dir = branch_direction_t::NONE; if (solution[j] < root_solution[j] - eps) { f_t f = solution[j] - std::floor(solution[j]); f_t d = root_solution[j] - solution[j]; score = f / d; - dir = rounding_direction_t::DOWN; + dir = branch_direction_t::DOWN; } else if (solution[j] > root_solution[j] + eps) { f_t f = std::ceil(solution[j]) - solution[j]; f_t d = solution[j] - root_solution[j]; score = f / d; - dir = rounding_direction_t::UP; + dir = branch_direction_t::UP; } if (min_score > score) { @@ -48,12 +46,12 @@ branch_variable_t line_search_diving(const std::vector& fractional, // If the current solution is equal to the root solution, arbitrarily // set the branch variable to the first fractional variable and round it down - if (round_dir == rounding_direction_t::NONE) { + if (round_dir == branch_direction_t::NONE) { branch_var = fractional[0]; - round_dir = rounding_direction_t::DOWN; + round_dir = branch_direction_t::DOWN; } - assert(round_dir != rounding_direction_t::NONE); + assert(round_dir != branch_direction_t::NONE); assert(branch_var >= 0); log.debug("Line search diving: selected var %d with val = %e, round dir = %d and score = %e\n", @@ -72,14 +70,63 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& root_solution, logger_t& log) { - return pseudocost_diving_from_arrays(pc.pseudo_cost_sum_down.data(), - pc.pseudo_cost_sum_up.data(), - pc.pseudo_cost_num_down.data(), - pc.pseudo_cost_num_up.data(), - (i_t)pc.pseudo_cost_sum_down.size(), - fractional, - solution, - root_solution); + const i_t num_fractional = fractional.size(); + if (num_fractional == 0) return {-1, branch_direction_t::NONE}; + + f_t avg_down = pc.compute_pseudocost_average_down(); + f_t avg_up = pc.compute_pseudocost_average_up(); + + i_t branch_var = fractional[0]; + f_t max_score = std::numeric_limits::lowest(); + branch_direction_t round_dir = branch_direction_t::DOWN; + constexpr f_t eps = f_t(1e-6); + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t pc_down = pc.get_pseudocost_down(j, avg_down); + f_t pc_up = pc.get_pseudocost_up(j, avg_up); + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); + + f_t score = 0; + branch_direction_t dir = branch_direction_t::DOWN; + + f_t root_val = root_solution[j]; + + if (solution[j] < root_val - f_t(0.4)) { + score = score_down; + dir = branch_direction_t::DOWN; + } else if (solution[j] > root_val + f_t(0.4)) { + score = score_up; + dir = branch_direction_t::UP; + } else if (f_down < f_t(0.3)) { + score = score_down; + dir = branch_direction_t::DOWN; + } else if (f_down > f_t(0.7)) { + score = score_up; + dir = branch_direction_t::UP; + } else if (pc_down < pc_up + eps) { + score = score_down; + dir = branch_direction_t::DOWN; + } else { + score = score_up; + dir = branch_direction_t::UP; + } + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + if (round_dir == branch_direction_t::NONE) { + branch_var = fractional[0]; + round_dir = branch_direction_t::DOWN; + } + + return {branch_var, round_dir}; } template @@ -89,14 +136,39 @@ branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& incumbent, logger_t& log) { - return guided_diving_from_arrays(pc.pseudo_cost_sum_down.data(), - pc.pseudo_cost_sum_up.data(), - pc.pseudo_cost_num_down.data(), - pc.pseudo_cost_num_up.data(), - (i_t)pc.pseudo_cost_sum_down.size(), - fractional, - solution, - incumbent); + const i_t num_fractional = fractional.size(); + if (num_fractional == 0) return {-1, branch_direction_t::NONE}; + + f_t avg_down = pc.compute_pseudocost_average_down(); + f_t avg_up = pc.compute_pseudocost_average_up(); + + i_t branch_var = fractional[0]; + f_t max_score = std::numeric_limits::lowest(); + branch_direction_t round_dir = branch_direction_t::DOWN; + constexpr f_t eps = f_t(1e-6); + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); + f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); + branch_direction_t dir = + down_dist < up_dist + eps ? branch_direction_t::DOWN : branch_direction_t::UP; + + f_t pc_down = pc.get_pseudocost_down(j, avg_down); + f_t pc_up = pc.get_pseudocost_up(j, avg_up); + f_t score1 = dir == branch_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; + f_t score2 = dir == branch_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; + f_t score = (score1 + score2) / 6; + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + return {branch_var, round_dir}; } template @@ -130,10 +202,10 @@ branch_variable_t coefficient_diving(const lp_problem_t& lp_probl const std::vector& down_locks, logger_t& log) { - i_t branch_var = -1; - i_t min_locks = std::numeric_limits::max(); - rounding_direction_t round_dir = rounding_direction_t::NONE; - constexpr f_t eps = 1e-6; + i_t branch_var = -1; + i_t min_locks = std::numeric_limits::max(); + branch_direction_t round_dir = branch_direction_t::NONE; + constexpr f_t eps = 1e-6; for (i_t j : fractional) { f_t f_down = solution[j] - std::floor(solution[j]); @@ -151,18 +223,18 @@ branch_variable_t coefficient_diving(const lp_problem_t& lp_probl branch_var = j; if (up_lock < down_lock) { - round_dir = rounding_direction_t::UP; + round_dir = branch_direction_t::UP; } else if (up_lock > down_lock) { - round_dir = rounding_direction_t::DOWN; + round_dir = branch_direction_t::DOWN; } else if (f_down < f_up + eps) { - round_dir = rounding_direction_t::DOWN; + round_dir = branch_direction_t::DOWN; } else { - round_dir = rounding_direction_t::UP; + round_dir = branch_direction_t::UP; } } } - assert(round_dir != rounding_direction_t::NONE); + assert(round_dir != branch_direction_t::NONE); assert(branch_var >= 0); log.debug( diff --git a/cpp/src/branch_and_bound/mip_node.cpp b/cpp/src/branch_and_bound/mip_node.cpp deleted file mode 100644 index 7b0f644f4e..0000000000 --- a/cpp/src/branch_and_bound/mip_node.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* clang-format off */ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ -/* clang-format on */ - -#include - -namespace cuopt::linear_programming::dual_simplex { - -bool inactive_status(node_status_t status) -{ - return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE || - status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL); -} - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index a24f67c3bc..c2c2a328a4 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -7,6 +7,8 @@ #pragma once +#include + #include #include @@ -29,31 +31,15 @@ enum class node_status_t : int { NUMERICAL = 5 // Encountered numerical issue when solving the LP relaxation }; -enum class rounding_direction_t : int8_t { NONE = -1, DOWN = 0, UP = 1 }; - -bool inactive_status(node_status_t status); +inline bool inactive_status(node_status_t status) +{ + return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE || + status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL); +} template class mip_node_t { public: - ~mip_node_t() - { - // Iterative teardown to avoid stack overflow on deep trees. - // Detach all descendants breadth-first, then destroy them as leaves. - std::vector> nodes; - for (auto& c : children) { - if (c) { nodes.push_back(std::move(c)); } - } - // nodes.size() grows so that this loop only terminates when only leaves remain - for (size_t i = 0; i < nodes.size(); ++i) { - for (auto& c : nodes[i]->children) { - if (c) { nodes.push_back(std::move(c)); } - } - } - - // scope-exit ensure destruction of all detached leaves - } - mip_node_t(mip_node_t&&) = default; mip_node_t& operator=(mip_node_t&&) = default; @@ -64,12 +50,12 @@ class mip_node_t { parent(nullptr), node_id(0), branch_var(-1), - branch_dir(rounding_direction_t::NONE), + branch_dir(branch_direction_t::NONE), branch_var_lower(-std::numeric_limits::infinity()), branch_var_upper(std::numeric_limits::infinity()), fractional_val(std::numeric_limits::infinity()), objective_estimate(std::numeric_limits::infinity()), - vstatus(0) + packed_vstatus(0) { children[0] = nullptr; children[1] = nullptr; @@ -82,10 +68,10 @@ class mip_node_t { parent(nullptr), node_id(0), branch_var(-1), - branch_dir(rounding_direction_t::NONE), + branch_dir(branch_direction_t::NONE), integer_infeasible(-1), objective_estimate(std::numeric_limits::infinity()), - vstatus(basis) + packed_vstatus(compress_vstatus(basis)) { children[0] = nullptr; children[1] = nullptr; @@ -95,7 +81,7 @@ class mip_node_t { mip_node_t* parent_node, i_t node_num, i_t branch_variable, - rounding_direction_t branch_direction, + branch_direction_t branch_direction, f_t branch_var_value, i_t integer_inf, const std::vector& basis) @@ -109,16 +95,32 @@ class mip_node_t { fractional_val(branch_var_value), integer_infeasible(integer_inf), objective_estimate(parent_node->objective_estimate), - vstatus(basis) + packed_vstatus(compress_vstatus(basis)) { - branch_var_lower = branch_direction == rounding_direction_t::DOWN ? problem.lower[branch_var] - : std::ceil(branch_var_value); - branch_var_upper = branch_direction == rounding_direction_t::DOWN ? std::floor(branch_var_value) - : problem.upper[branch_var]; + branch_var_lower = branch_direction == branch_direction_t::DOWN ? problem.lower[branch_var] + : std::ceil(branch_var_value); + branch_var_upper = branch_direction == branch_direction_t::DOWN ? std::floor(branch_var_value) + : problem.upper[branch_var]; children[0] = nullptr; children[1] = nullptr; } + ~mip_node_t() + { + // Iterative teardown to avoid stack overflow on deep trees. + // Detach all descendants breadth-first, then destroy them as leaves. + std::vector> nodes; + for (auto& c : children) { + if (c) { nodes.push_back(std::move(c)); } + } + // nodes.size() grows so that this loop only terminates when only leaves remain + for (size_t i = 0; i < nodes.size(); ++i) { + for (auto& c : nodes[i]->children) { + if (c) { nodes.push_back(std::move(c)); } + } + } + } + void get_variable_bounds(std::vector& lower, std::vector& upper, std::vector& bounds_changed) const @@ -164,7 +166,7 @@ class mip_node_t { children[0] = std::move(down_child); children[1] = std::move(up_child); // When we add children we no longer need to store our basis - vstatus.clear(); + packed_vstatus = {}; } bool is_inactive() const @@ -252,7 +254,7 @@ class mip_node_t { // This method creates a copy of the current node // with its parent set to `nullptr` // This detaches the node from the tree. - mip_node_t detach_copy() const + mip_node_t detach_copy() const { mip_node_t copy; copy.lower_bound = lower_bound; @@ -260,7 +262,7 @@ class mip_node_t { copy.depth = depth; copy.node_id = node_id; copy.integer_infeasible = integer_infeasible; - copy.vstatus = vstatus; + copy.packed_vstatus = packed_vstatus; copy.branch_var = branch_var; copy.branch_dir = branch_dir; copy.branch_var_lower = branch_var_lower; @@ -282,7 +284,7 @@ class mip_node_t { i_t depth; i_t node_id; i_t branch_var; - rounding_direction_t branch_dir; + branch_direction_t branch_dir; f_t branch_var_lower; f_t branch_var_upper; f_t fractional_val; @@ -291,7 +293,11 @@ class mip_node_t { mip_node_t* parent; std::unique_ptr children[2]; - std::vector vstatus; + std::vector packed_vstatus; + + // Indicate if we can dive from this node or not. This is set to false when + // this node was already selected for diving once. + omp_atomic_t can_dive{true}; // Worker-local identification for deterministic ordering: // - origin_worker_id: which worker created this node @@ -312,7 +318,7 @@ class mip_node_t { const mip_node_t* node = this; while (node != nullptr && node->branch_var >= 0) { uint64_t step = static_cast(node->branch_var) << 1; - step |= (node->branch_dir == rounding_direction_t::UP) ? 1 : 0; + step |= (node->branch_dir == branch_direction_t::UP) ? 1 : 0; path_steps.push_back(step); node = node->parent; } @@ -359,7 +365,7 @@ class search_tree_t { parent_node, ++id, branch_var, - rounding_direction_t::DOWN, + branch_direction_t::DOWN, fractional_val, integer_infeasible, parent_vstatus); @@ -367,14 +373,14 @@ class search_tree_t { parent_node, down_child.get(), branch_var, - rounding_direction_t::DOWN, + branch_direction_t::DOWN, std::floor(fractional_val)); auto up_child = std::make_unique>(original_lp, parent_node, ++id, branch_var, - rounding_direction_t::UP, + branch_direction_t::UP, fractional_val, integer_infeasible, parent_vstatus); @@ -383,7 +389,7 @@ class search_tree_t { parent_node, up_child.get(), branch_var, - rounding_direction_t::UP, + branch_direction_t::UP, std::ceil(fractional_val)); assert(parent_vstatus.size() == original_lp.num_cols); @@ -405,7 +411,7 @@ class search_tree_t { const mip_node_t* origin_ptr, const mip_node_t* dest_ptr, const i_t branch_var, - rounding_direction_t branch_dir, + branch_direction_t branch_dir, const f_t bound) { if (write_graphviz) { @@ -413,7 +419,7 @@ class search_tree_t { origin_ptr->node_id, dest_ptr->node_id, branch_var, - branch_dir == rounding_direction_t::DOWN ? "<=" : ">=", + branch_dir == branch_direction_t::DOWN ? "<=" : ">=", bound); } } diff --git a/cpp/src/branch_and_bound/node_queue.hpp b/cpp/src/branch_and_bound/node_queue.hpp index 09d030c96e..15c50c99b7 100644 --- a/cpp/src/branch_and_bound/node_queue.hpp +++ b/cpp/src/branch_and_bound/node_queue.hpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -29,12 +28,14 @@ class heap_t { { buffer.push_back(node); std::push_heap(buffer.begin(), buffer.end(), comp); + ++num_entries_; } void push(T&& node) { buffer.push_back(std::move(node)); std::push_heap(buffer.begin(), buffer.end(), comp); + ++num_entries_; } template @@ -42,39 +43,99 @@ class heap_t { { buffer.emplace_back(std::forward(args)...); std::push_heap(buffer.begin(), buffer.end(), comp); + ++num_entries_; + assert(num_entries_.load() == buffer.size()); } - std::optional pop() + T pop() { - if (buffer.empty()) return std::nullopt; - std::pop_heap(buffer.begin(), buffer.end(), comp); T node = std::move(buffer.back()); buffer.pop_back(); + --num_entries_; + assert(num_entries_.load() == buffer.size()); return node; } - size_t size() const { return buffer.size(); } + size_t size() const { return num_entries_; } T& top() { return buffer.front(); } - void clear() { buffer.clear(); } - bool empty() const { return buffer.empty(); } + + void clear() + { + buffer.clear(); + num_entries_ = 0; + } + + bool empty() const { return num_entries_ == 0; } // Read-only access to underlying buffer for iteration without modification const std::vector& data() const { return buffer; } private: std::vector buffer; + omp_atomic_t num_entries_{0}; Comp comp; }; -// A queue storing the nodes waiting to be explored/dived from. +// A queue storing the nodes waiting to be explored. Before calling pop or push in parallel, +// the mutex NEEDS to be acquired via the `lock()` method. It must the released afterwards with +// `unlock()`. template class node_queue_t { + public: + void push(mip_node_t* new_node) + { + assert(new_node != nullptr); + auto entry = std::make_shared(new_node); + best_first_heap_.push(entry); + if (new_node->can_dive) diving_heap_.push(entry); + lower_bound_ = best_first_heap_.top()->lower_bound; + } + + mip_node_t* pop_best_first() + { + if (best_first_heap_.empty()) { return nullptr; } + auto entry = best_first_heap_.pop(); + lower_bound_ = best_first_heap_.empty() ? std::numeric_limits::infinity() + : best_first_heap_.top()->lower_bound; + mip_node_t* node = std::exchange(entry->node, nullptr); + assert(node != nullptr); + return node; + } + + mip_node_t* pop_diving() + { + while (!diving_heap_.empty()) { + auto entry = diving_heap_.pop(); + if (entry->node != nullptr) { + entry->node->can_dive = false; + return entry->node; + } + } + return nullptr; + } + + void lock() { mutex_.lock(); } + void unlock() { mutex_.unlock(); } + + mip_node_t* bfs_top() + { + return best_first_heap_.empty() ? nullptr : best_first_heap_.top()->node; + } + + i_t diving_queue_size() { return diving_heap_.size(); } + i_t best_first_queue_size() { return best_first_heap_.size(); } + + f_t get_lower_bound() + { + return best_first_heap_.empty() ? std::numeric_limits::infinity() : lower_bound_.load(); + } + private: struct heap_entry_t { mip_node_t* node = nullptr; - f_t lower_bound = -inf; - f_t score = inf; + f_t lower_bound = -std::numeric_limits::infinity(); + f_t score = std::numeric_limits::infinity(); heap_entry_t(mip_node_t* new_node) : node(new_node), lower_bound(new_node->lower_bound), score(new_node->objective_estimate) @@ -102,67 +163,11 @@ class node_queue_t { } }; - heap_t, lower_bound_comp> best_first_heap; - heap_t, score_comp> diving_heap; - omp_mutex_t mutex; - - public: - void push(mip_node_t* new_node) - { - std::lock_guard lock(mutex); - auto entry = std::make_shared(new_node); - best_first_heap.push(entry); - diving_heap.push(entry); - } - - std::optional*> pop_best_first() - { - std::lock_guard lock(mutex); - auto entry = best_first_heap.pop(); - - if (entry.has_value()) { return std::exchange(entry.value()->node, nullptr); } - - return std::nullopt; - } - - std::optional*> pop_diving() - { - std::lock_guard lock(mutex); - - while (!diving_heap.empty()) { - auto entry = diving_heap.pop(); - - if (entry.has_value()) { - if (auto node_ptr = entry.value()->node; node_ptr != nullptr) { return node_ptr; } - } - } + heap_t, lower_bound_comp> best_first_heap_; + heap_t, score_comp> diving_heap_; + omp_mutex_t mutex_; - return std::nullopt; - } - - i_t diving_queue_size() - { - std::lock_guard lock(mutex); - return diving_heap.size(); - } - - i_t best_first_queue_size() - { - std::lock_guard lock(mutex); - return best_first_heap.size(); - } - - f_t get_lower_bound() - { - std::lock_guard lock(mutex); - return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; - } - - mip_node_t* bfs_top() - { - std::lock_guard lock(mutex); - return best_first_heap.empty() ? nullptr : best_first_heap.top()->node; - } + omp_atomic_t lower_bound_{std::numeric_limits::infinity()}; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index c38e98e27d..219f60db0d 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -24,7 +24,6 @@ #include namespace cuopt::linear_programming::dual_simplex { - namespace { static bool is_dual_simplex_done(dual::status_t status) @@ -218,8 +217,10 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& nonbasic_list, const std::vector& fractional, + const csc_matrix_t& AT, basis_update_mpf_t& basis_factors, - pseudo_costs_t& pc) + std::vector& strong_branch_down, + std::vector& strong_branch_up) { i_t m = lp.num_rows; i_t n = lp.num_cols; @@ -246,7 +247,7 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, objective_change_estimate_t estimate = single_pivot_objective_change_estimate(lp, settings, - pc.AT, + AT, vstatus, j, basic_map[j], @@ -258,8 +259,8 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t& lp, workspace, delta_z, work_estimate); - pc.strong_branch_down[k] = estimate.down_obj_change; - pc.strong_branch_up[k] = estimate.up_obj_change; + strong_branch_down[k] = estimate.down_obj_change; + strong_branch_up[k] = estimate.up_obj_change; } } @@ -298,12 +299,14 @@ void strong_branch_helper(i_t start, f_t root_obj, f_t upper_bound, i_t iter_limit, - pseudo_costs_t& pc, + std::vector& strong_branch_down, + std::vector& strong_branch_up, std::vector& dual_simplex_obj_down, std::vector& dual_simplex_obj_up, std::vector& dual_simplex_status_down, std::vector& dual_simplex_status_up, - shared_strong_branching_context_view_t& sb_view) + shared_strong_branching_context_view_t& sb_view, + omp_atomic_t& num_strong_branches_completed) { raft::common::nvtx::range scope("BB::strong_branch_helper"); lp_problem_t child_problem = original_lp; @@ -380,7 +383,7 @@ void strong_branch_helper(i_t start, } if (branch == 0) { - pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0); + strong_branch_down[k] = std::max(obj - root_obj, 0.0); dual_simplex_obj_down[k] = std::max(obj - root_obj, 0.0); dual_simplex_status_down[k] = status; if (verbose) { @@ -393,7 +396,7 @@ void strong_branch_helper(i_t start, toc(start_time)); } } else { - pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0); + strong_branch_up[k] = std::max(obj - root_obj, 0.0); dual_simplex_obj_up[k] = std::max(obj - root_obj, 0.0); dual_simplex_status_up[k] = status; if (verbose) { @@ -431,7 +434,7 @@ void strong_branch_helper(i_t start, } if (toc(start_time) > settings.time_limit) { break; } - const i_t completed = pc.num_strong_branches_completed++; + const i_t completed = num_strong_branches_completed++; if (thread_id == 0 && toc(last_log) > 10) { last_log = tic(); @@ -463,7 +466,7 @@ std::pair trial_branching(const lp_problem_t& ori f_t upper_bound, f_t start_time, i_t iter_limit, - omp_atomic_t& total_lp_iter) + i_t& iter) { lp_problem_t child_problem = original_lp; child_problem.lower[branch_var] = branch_var_lower; @@ -479,7 +482,7 @@ std::pair trial_branching(const lp_problem_t& ori objective_upper_bound(child_problem, upper_bound, child_settings.dual_tol); lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); - i_t iter = 0; + iter = 0; std::vector child_vstatus = vstatus; std::vector child_edge_norms = edge_norms; std::vector child_basic_list = basic_list; @@ -502,7 +505,7 @@ std::pair trial_branching(const lp_problem_t& ori solution, iter, child_edge_norms); - total_lp_iter += iter; + settings.log.debug("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %s. Obj %e\n", branch_var, child_problem.lower[branch_var], @@ -732,9 +735,9 @@ static void batch_pdlp_strong_branching_task( std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); if (warm_start_remaining_time <= 0.0) { return; } - assert(!pc.pdlp_warm_cache.populated && "PDLP warm cache should not be populated at this point"); + assert(!pc.pdlp_warm_cache->populated && "PDLP warm cache should not be populated at this point"); - if (!pc.pdlp_warm_cache.populated) { + if (!pc.pdlp_warm_cache->populated) { pdlp_solver_settings_t ws_settings; ws_settings.method = method_t::PDLP; ws_settings.presolver = presolver_t::None; @@ -756,14 +759,14 @@ static void batch_pdlp_strong_branching_task( ws_settings.inside_mip = true; if (effective_batch_pdlp == 1) { ws_settings.concurrent_halt = &concurrent_halt; } - auto start_time = std::chrono::high_resolution_clock::now(); + auto lp_start_time = std::chrono::high_resolution_clock::now(); - auto ws_solution = solve_lp(&pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, ws_settings); + auto ws_solution = solve_lp(&pc.pdlp_warm_cache->batch_pdlp_handle, mps_model, ws_settings); if (verbose) { auto end_time = std::chrono::high_resolution_clock::now(); auto duration = - std::chrono::duration_cast(end_time - start_time).count(); + std::chrono::duration_cast(end_time - lp_start_time).count(); settings.log.printf( "Original problem solved in %d milliseconds" " and iterations: %d\n", @@ -777,21 +780,21 @@ static void batch_pdlp_strong_branching_task( const auto& ws_dual = ws_solution.get_dual_solution(); // Need to use the pc steam since the batch pdlp handle will get destroyed after the warm // start - cache.initial_primal = rmm::device_uvector(ws_primal, ws_primal.stream()); - cache.initial_dual = rmm::device_uvector(ws_dual, ws_dual.stream()); - cache.step_size = ws_solution.get_pdlp_warm_start_data().initial_step_size_; - cache.primal_weight = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_; - cache.pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_; - cache.populated = true; + cache->initial_primal = rmm::device_uvector(ws_primal, ws_primal.stream()); + cache->initial_dual = rmm::device_uvector(ws_dual, ws_dual.stream()); + cache->step_size = ws_solution.get_pdlp_warm_start_data().initial_step_size_; + cache->primal_weight = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_; + cache->pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_; + cache->populated = true; if (verbose) { settings.log.printf( "Cached PDLP warm start: primal=%zu dual=%zu step_size=%e primal_weight=%e iters=%d\n", - cache.initial_primal.size(), - cache.initial_dual.size(), - cache.step_size, - cache.primal_weight, - cache.pdlp_iteration); + cache->initial_primal.size(), + cache->initial_dual.size(), + cache->step_size, + cache->primal_weight, + cache->pdlp_iteration); } } else { if (verbose) { @@ -817,22 +820,23 @@ static void batch_pdlp_strong_branching_task( if (batch_remaining_time <= 0.0) { return; } pdlp_settings.time_limit = batch_remaining_time; - if (pc.pdlp_warm_cache.populated) { + if (pc.pdlp_warm_cache->populated) { auto& cache = pc.pdlp_warm_cache; - pdlp_settings.set_initial_primal_solution(cache.initial_primal.data(), - cache.initial_primal.size(), - cache.batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_dual_solution( - cache.initial_dual.data(), cache.initial_dual.size(), cache.batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_step_size(cache.step_size); - pdlp_settings.set_initial_primal_weight(cache.primal_weight); - pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + pdlp_settings.set_initial_primal_solution(cache->initial_primal.data(), + cache->initial_primal.size(), + cache->batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution(cache->initial_dual.data(), + cache->initial_dual.size(), + cache->batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(cache->step_size); + pdlp_settings.set_initial_primal_weight(cache->primal_weight); + pdlp_settings.set_initial_pdlp_iteration(cache->pdlp_iteration); } if (concurrent_halt.load() == 1) { return; } const auto solutions = batch_pdlp_solve( - &pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); + &pc.pdlp_warm_cache->batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings); f_t batch_pdlp_strong_branching_time = toc(start_batch); // Fail safe in case the batch PDLP failed and produced no solutions @@ -888,13 +892,13 @@ static void batch_pdlp_reliability_branching_task( const std::vector& candidate_vars, const simplex_solver_settings_t& settings, shared_strong_branching_context_view_t& sb_view, - batch_pdlp_warm_cache_t& pdlp_warm_cache, + batch_pdlp_warm_cache_t* pdlp_warm_cache, std::vector& pdlp_obj_down, std::vector& pdlp_obj_up) { - log.printf(rb_mode == 2 ? "RB batch PDLP only for %d candidates\n" - : "RB cooperative batch PDLP and DS for %d candidates\n", - num_candidates); + log.debug(rb_mode == 2 ? "RB batch PDLP only for %d candidates\n" + : "RB cooperative batch PDLP and DS for %d candidates\n", + num_candidates); f_t start_batch = tic(); @@ -935,15 +939,16 @@ static void batch_pdlp_reliability_branching_task( } pdlp_settings.time_limit = batch_remaining_time; - if (pdlp_warm_cache.populated) { - auto& cache = pdlp_warm_cache; - pdlp_settings.set_initial_primal_solution( - cache.initial_primal.data(), cache.initial_primal.size(), batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_dual_solution( - cache.initial_dual.data(), cache.initial_dual.size(), batch_pdlp_handle.get_stream()); - pdlp_settings.set_initial_step_size(cache.step_size); - pdlp_settings.set_initial_primal_weight(cache.primal_weight); - pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration); + if (pdlp_warm_cache->populated) { + pdlp_settings.set_initial_primal_solution(pdlp_warm_cache->initial_primal.data(), + pdlp_warm_cache->initial_primal.size(), + batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_dual_solution(pdlp_warm_cache->initial_dual.data(), + pdlp_warm_cache->initial_dual.size(), + batch_pdlp_handle.get_stream()); + pdlp_settings.set_initial_step_size(pdlp_warm_cache->step_size); + pdlp_settings.set_initial_primal_weight(pdlp_warm_cache->primal_weight); + pdlp_settings.set_initial_pdlp_iteration(pdlp_warm_cache->pdlp_iteration); } if (concurrent_halt.load() == 1) { return; } @@ -955,7 +960,7 @@ static void batch_pdlp_reliability_branching_task( if (solutions.get_additional_termination_informations().size() != static_cast(num_candidates) * 2) { - log.printf("RB batch PDLP failed and produced no solutions\n"); + log.debug("RB batch PDLP failed and produced no solutions\n"); return; } @@ -966,10 +971,10 @@ static void batch_pdlp_reliability_branching_task( } } - log.printf("RB batch PDLP completed in %.2fs. Solved %d/%d\n", - batch_pdlp_time, - amount_done, - num_candidates * 2); + log.debug("RB batch PDLP completed in %.2fs. Solved %d/%d\n", + batch_pdlp_time, + amount_done, + num_candidates * 2); for (i_t k = 0; k < num_candidates; k++) { if (solutions.get_termination_status(k) == pdlp_termination_status_t::Optimal) { @@ -1002,9 +1007,9 @@ void strong_branching(const lp_problem_t& original_lp, constexpr bool verbose = false; pc.resize(original_lp.num_cols); - pc.strong_branch_down.assign(fractional.size(), 0); - pc.strong_branch_up.assign(fractional.size(), 0); - pc.num_strong_branches_completed = 0; + std::vector strong_branch_down(fractional.size(), std::numeric_limits::quiet_NaN()); + std::vector strong_branch_up(fractional.size(), std::numeric_limits::quiet_NaN()); + omp_atomic_t num_strong_branches_completed = 0; const f_t elapsed_time = toc(start_time); if (elapsed_time > settings.time_limit) { return; } @@ -1049,8 +1054,10 @@ void strong_branching(const lp_problem_t& original_lp, basic_list, nonbasic_list, fractional, + *pc.AT, basis_factors, - pc); + strong_branch_down, + strong_branch_up); } else { #pragma omp parallel num_threads(settings.num_threads) { @@ -1082,7 +1089,6 @@ void strong_branching(const lp_problem_t& original_lp, i_t start = std::floor(k * fractional.size() / n); i_t end = std::floor((k + 1) * fractional.size() / n); - constexpr bool verbose = false; if (verbose) { settings.log.printf("Thread id %d task id %d start %d end %d. size %d\n", omp_get_thread_num(), @@ -1105,12 +1111,14 @@ void strong_branching(const lp_problem_t& original_lp, root_obj, upper_bound, simplex_iteration_limit, - pc, + strong_branch_down, + strong_branch_up, dual_simplex_obj_down, dual_simplex_obj_up, dual_simplex_status_down, dual_simplex_status_up, - sb_view); + sb_view, + num_strong_branches_completed); } // DS done: signal PDLP to stop (time-limit or all work done) and wait if (effective_batch_pdlp == 1) { concurrent_halt.store(1); } @@ -1178,7 +1186,7 @@ void strong_branching(const lp_problem_t& original_lp, for (i_t k = 0; k < fractional.size(); k++) { for (i_t branch = 0; branch < 2; branch++) { const bool is_down = (branch == 0); - f_t& sb_dest = is_down ? pc.strong_branch_down[k] : pc.strong_branch_up[k]; + f_t& sb_dest = is_down ? strong_branch_down[k] : strong_branch_up[k]; f_t ds_obj = is_down ? dual_simplex_obj_down[k] : dual_simplex_obj_up[k]; dual::status_t ds_status = is_down ? dual_simplex_status_down[k] : dual_simplex_status_up[k]; @@ -1211,12 +1219,12 @@ void strong_branching(const lp_problem_t& original_lp, } } - pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root = + pc.pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root = (f_t(merged_from_pdlp) / f_t(fractional.size() * 2)) * 100.0; if (verbose) { settings.log.printf( "Batch PDLP for strong branching. Percent solved by batch PDLP at root: %f\n", - pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root); + pc.pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root); settings.log.printf( "Merged results: %d from DS, %d from PDLP, %d unresolved (NaN), %d solved by both\n", merged_from_ds, @@ -1226,22 +1234,57 @@ void strong_branching(const lp_problem_t& original_lp, } } - pc.update_pseudo_costs_from_strong_branching(fractional, root_solution.x); + pc.update_pseudo_costs_from_strong_branching( + fractional, strong_branch_down, strong_branch_up, root_solution.x); +} + +template +inline f_t pseudo_costs_t::compute_pseudocost_average_down() +{ + i_t num_initialized = 0; + f_t avg = 0.0; + + for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) { + i_t num = pseudo_cost_num_down[j]; + f_t sum = pseudo_cost_sum_down[j]; + if (num > 0 && std::isfinite(sum)) { + ++num_initialized; + avg += sum / num; + } + } + + return (num_initialized > 0) ? avg / num_initialized : 1.0; +} + +template +inline f_t pseudo_costs_t::compute_pseudocost_average_up() +{ + i_t num_initialized = 0; + f_t avg = 0.0; + + for (size_t j = 0; j < pseudo_cost_sum_up.size(); ++j) { + i_t num = pseudo_cost_num_up[j]; + f_t sum = pseudo_cost_sum_up[j]; + if (num > 0 && std::isfinite(sum)) { + ++num_initialized; + avg += sum / num; + } + } + + return (num_initialized > 0) ? avg / num_initialized : 1.0; } template f_t pseudo_costs_t::calculate_pseudocost_score(i_t j, const std::vector& solution, - f_t pseudo_cost_up_avg, - f_t pseudo_cost_down_avg) const + f_t avg_down, + f_t avg_up) const { constexpr f_t eps = 1e-6; - i_t num_up = pseudo_cost_num_up[j]; - i_t num_down = pseudo_cost_num_down[j]; - f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; - f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; f_t f_down = solution[j] - std::floor(solution[j]); f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t pc_down = get_pseudocost_down(j, avg_down); + f_t pc_up = get_pseudocost_up(j, avg_up); return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); } @@ -1250,11 +1293,11 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt f_t leaf_objective) { const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0); - const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN + const f_t frac = node_ptr->branch_dir == branch_direction_t::DOWN ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) : std::ceil(node_ptr->fractional_val) - node_ptr->fractional_val; - if (node_ptr->branch_dir == rounding_direction_t::DOWN) { + if (node_ptr->branch_dir == branch_direction_t::DOWN) { pseudo_cost_sum_down[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_down[node_ptr->branch_var]++; } else { @@ -1263,43 +1306,19 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt } } -template -void pseudo_costs_t::initialized(i_t& num_initialized_down, - i_t& num_initialized_up, - f_t& pseudo_cost_down_avg, - f_t& pseudo_cost_up_avg) const -{ - auto avgs = compute_pseudo_cost_averages(pseudo_cost_sum_down.data(), - pseudo_cost_sum_up.data(), - pseudo_cost_num_down.data(), - pseudo_cost_num_up.data(), - pseudo_cost_sum_down.size()); - pseudo_cost_down_avg = avgs.down_avg; - pseudo_cost_up_avg = avgs.up_avg; -} - template i_t pseudo_costs_t::variable_selection(const std::vector& fractional, - const std::vector& solution, - logger_t& log) + const std::vector& solution) { i_t branch_var = fractional[0]; f_t max_score = -1; - i_t num_initialized_down; - i_t num_initialized_up; - f_t pseudo_cost_down_avg; - f_t pseudo_cost_up_avg; + f_t avg_down = compute_pseudocost_average_down(); + f_t avg_up = compute_pseudocost_average_up(); - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); - - log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + settings.log.debug("PC: avg down %e up %e\n", avg_down, avg_up); for (i_t j : fractional) { - f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, solution, avg_down, avg_up); if (score > max_score) { max_score = score; @@ -1307,10 +1326,10 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio } } - log.debug("Pseudocost branching on %d. Value %e. Score %e.\n", - branch_var, - solution[branch_var], - max_score); + settings.log.debug("Pseudocost branching on %d. Value %e. Score %e.\n", + branch_var, + solution[branch_var], + max_score); return branch_var; } @@ -1322,19 +1341,17 @@ i_t pseudo_costs_t::reliable_variable_selection( branch_and_bound_worker_t* worker, const std::vector& var_types, const branch_and_bound_stats_t& bnb_stats, - const simplex_solver_settings_t& settings, f_t upper_bound, int max_num_tasks, - logger_t& log, const std::vector& new_slacks, const lp_problem_t& original_lp) { - constexpr f_t eps = 1e-6; - f_t start_time = bnb_stats.start_time; - i_t branch_var = fractional[0]; - f_t max_score = -1; - f_t pseudo_cost_down_avg = -1; - f_t pseudo_cost_up_avg = -1; + constexpr f_t eps = 1e-6; + f_t start_time = bnb_stats.start_time; + i_t branch_var = fractional[0]; + f_t max_score = -1; + f_t avg_down{0}; + f_t avg_up{0}; lp_solution_t& leaf_solution = worker->leaf_solution; const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters; @@ -1364,17 +1381,11 @@ i_t pseudo_costs_t::reliable_variable_selection( // If `reliable_threshold == 0`, then we set the uninitialized pseudocosts to the average. // Otherwise, the best ones are initialized via strong branching, while the other are ignored. // - // In the latter, we are not using the average pseudocost (which calculated in the `initialized` - // method). + // So we only need to initialize the average for the former. if (reliable_threshold == 0) { - i_t num_initialized_up; - i_t num_initialized_down; - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); - log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + avg_down = compute_pseudocost_average_down(); + avg_up = compute_pseudocost_average_up(); + settings.log.debug("PC: avg down %e up %e\n", avg_down, avg_up); } std::vector> unreliable_list; @@ -1386,8 +1397,7 @@ i_t pseudo_costs_t::reliable_variable_selection( unreliable_list.push_back(std::make_pair(-1, j)); continue; } - f_t score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, avg_down, avg_up); if (score > max_score) { max_score = score; @@ -1396,16 +1406,17 @@ i_t pseudo_costs_t::reliable_variable_selection( } if (unreliable_list.empty()) { - log.printf("pc branching on %d. Value %e. Score %e\n", - branch_var, - leaf_solution.x[branch_var], - max_score); + settings.log.debug("pc branching on %d. Value %e. Score %e\n", + branch_var, + leaf_solution.x[branch_var], + max_score); return branch_var; } // 0: no batch PDLP, 1: cooperative batch PDLP and DS, 2: batch PDLP only const i_t rb_mode = settings.mip_batch_pdlp_reliability_branching; + // We don't use batch PDLP in reliability branching if the PDLP warm start data was not filled // This indicates that PDLP alone (not batched) couldn't even run at the root node // So it will most likely perform poorly compared to DS @@ -1414,31 +1425,45 @@ i_t pseudo_costs_t::reliable_variable_selection( // using batch PDLP constexpr i_t min_num_candidates_for_pdlp = 5; constexpr f_t min_percent_solved_by_batch_pdlp_at_root_for_pdlp = 5.0; - // Batch PDLP is either forced or we use the heuristic to decide if it should be used - const bool use_pdlp = (rb_mode == 2) || (rb_mode != 0 && !settings.sub_mip && - !settings.deterministic && pdlp_warm_cache.populated && - unreliable_list.size() > min_num_candidates_for_pdlp && - pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root > - min_percent_solved_by_batch_pdlp_at_root_for_pdlp); - - if (rb_mode != 0 && !pdlp_warm_cache.populated) { - log.printf("PDLP warm start data not populated, using DS only\n"); + + // Check if batch PDLP was forced to be on + bool use_pdlp = rb_mode == 2; + + // Use the heuristic to decide if it should be used (in case it is set to automatic) + if (!use_pdlp && rb_mode != 0) { + // Check if it is a sub MIP or the determinism mode is on. + use_pdlp = !settings.sub_mip; + use_pdlp &= !settings.deterministic; + + // Check if the warm cache was filled at the root + use_pdlp &= pdlp_warm_cache->populated; + + // Check if there are enough candidates for batch PDLP + use_pdlp &= unreliable_list.size() > min_num_candidates_for_pdlp; + + // Check if batch PDLP was effective for strong branching at the root node + use_pdlp &= pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root > + min_percent_solved_by_batch_pdlp_at_root_for_pdlp; + } + + if (rb_mode != 0 && !pdlp_warm_cache->populated) { + settings.log.debug("PDLP warm start data not populated, using DS only\n"); } else if (rb_mode != 0 && settings.sub_mip) { - log.printf("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n"); + settings.log.debug("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n"); } else if (rb_mode != 0 && settings.deterministic) { - log.printf( + settings.log.debug( "Batch PDLP reliability branching is disabled because deterministic mode is enabled\n"); } else if (rb_mode != 0 && unreliable_list.size() < min_num_candidates_for_pdlp) { - log.printf("Not enough candidates to use batch PDLP, using DS only\n"); - } else if (rb_mode != 0 && pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root < 5.0) { - log.printf("Percent solved by batch PDLP at root is too low, using DS only\n"); + settings.log.debug("Not enough candidates to use batch PDLP, using DS only\n"); + } else if (rb_mode != 0 && pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root < 5.0) { + settings.log.debug("Percent solved by batch PDLP at root is too low, using DS only\n"); } else if (use_pdlp) { - log.printf( + settings.log.debug( "Using batch PDLP because populated, unreliable list size is %d (> %d), and percent solved " "by batch PDLP at root is %f%% (> %f%%)\n", static_cast(unreliable_list.size()), min_num_candidates_for_pdlp, - pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root, + pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root, min_percent_solved_by_batch_pdlp_at_root_for_pdlp); } @@ -1454,9 +1479,9 @@ i_t pseudo_costs_t::reliable_variable_selection( assert(num_candidates > 0); assert(num_tasks > 0); - log.printf( + settings.log.debug( "RB iters = %d, B&B iters = %d, unreliable = %d, num_tasks = %d, reliable_threshold = %d\n", - strong_branching_lp_iter.load(), + static_cast(strong_branching_lp_iter), branch_and_bound_lp_iters, unreliable_list.size(), num_tasks, @@ -1487,8 +1512,8 @@ i_t pseudo_costs_t::reliable_variable_selection( objective_change_estimate_t estimate = single_pivot_objective_change_estimate(worker->leaf_problem, settings, - AT, - node_ptr->vstatus, + *AT, + worker->leaf_vstatus, j, basic_map[j], leaf_solution, @@ -1503,8 +1528,7 @@ i_t pseudo_costs_t::reliable_variable_selection( score = std::max(estimate.up_obj_change, eps) * std::max(estimate.down_obj_change, eps); } else { // Use the previous score, even if it is unreliable - score = calculate_pseudocost_score( - j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + score = calculate_pseudocost_score(j, leaf_solution.x, avg_down, avg_up); } } } else { @@ -1542,7 +1566,7 @@ i_t pseudo_costs_t::reliable_variable_selection( if (use_pdlp) { #pragma omp task default(shared) - batch_pdlp_reliability_branching_task(log, + batch_pdlp_reliability_branching_task(settings.log, rb_mode, num_candidates, start_time, @@ -1554,13 +1578,13 @@ i_t pseudo_costs_t::reliable_variable_selection( candidate_vars, settings, sb_view, - pdlp_warm_cache, + pdlp_warm_cache.get(), pdlp_obj_down, pdlp_obj_up); } if (toc(start_time) > settings.time_limit) { - log.printf("Time limit reached\n"); + settings.log.debug("Time limit reached\n"); if (use_pdlp) { concurrent_halt.store(1); #pragma omp taskwait @@ -1590,16 +1614,17 @@ i_t pseudo_costs_t::reliable_variable_selection( if (toc(start_time) > settings.time_limit) { continue; } if (rb_mode == 1 && sb_view.is_solved(i)) { - log.printf( + settings.log.debug( "DS skipping variable %d branch down (shared_idx %d): already solved by PDLP\n", j, i); } else { pseudo_cost_mutex_down[j].lock(); if (pseudo_cost_num_down[j] < reliable_threshold) { // Do trial branching on the down branch + i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, - node_ptr->vstatus, + worker->leaf_vstatus, worker->leaf_edge_norms, worker->basis_factors, worker->basic_list, @@ -1610,7 +1635,8 @@ i_t pseudo_costs_t::reliable_variable_selection( upper_bound, start_time, iter_limit_per_trial, - strong_branching_lp_iter); + iter); + strong_branching_lp_iter += iter; dual_simplex_obj_down[i] = obj; dual_simplex_status_down[i] = status; @@ -1619,7 +1645,6 @@ i_t pseudo_costs_t::reliable_variable_selection( f_t change_in_x = leaf_solution.x[j] - std::floor(leaf_solution.x[j]); pseudo_cost_sum_down[j] += change_in_obj / change_in_x; pseudo_cost_num_down[j]++; - // Should be valid if were are already here if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(i); } } } else { @@ -1633,16 +1658,18 @@ i_t pseudo_costs_t::reliable_variable_selection( const i_t shared_idx = i + num_candidates; if (rb_mode == 1 && sb_view.is_solved(shared_idx)) { - log.printf("DS skipping variable %d branch up (shared_idx %d): already solved by PDLP\n", - j, - shared_idx); + settings.log.debug( + "DS skipping variable %d branch up (shared_idx %d): already solved by PDLP\n", + j, + shared_idx); } else { pseudo_cost_mutex_up[j].lock(); if (pseudo_cost_num_up[j] < reliable_threshold) { + i_t iter = 0; const auto [obj, status] = trial_branching(worker->leaf_problem, settings, var_types, - node_ptr->vstatus, + worker->leaf_vstatus, worker->leaf_edge_norms, worker->basis_factors, worker->basic_list, @@ -1653,7 +1680,8 @@ i_t pseudo_costs_t::reliable_variable_selection( upper_bound, start_time, iter_limit_per_trial, - strong_branching_lp_iter); + iter); + strong_branching_lp_iter += iter; dual_simplex_obj_up[i] = obj; dual_simplex_status_up[i] = status; @@ -1662,7 +1690,6 @@ i_t pseudo_costs_t::reliable_variable_selection( f_t change_in_x = std::ceil(leaf_solution.x[j]) - leaf_solution.x[j]; pseudo_cost_sum_up[j] += change_in_obj / change_in_x; pseudo_cost_num_up[j]++; - // Should be valid if were are already here if (rb_mode == 1 && is_dual_simplex_done(status)) { sb_view.mark_solved(shared_idx); } } } else { @@ -1674,9 +1701,7 @@ i_t pseudo_costs_t::reliable_variable_selection( if (toc(start_time) > settings.time_limit) { continue; } - score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); - + score = calculate_pseudocost_score(j, leaf_solution.x, avg_down, avg_up); score_mutex.lock(); if (score > max_score) { max_score = score; @@ -1690,24 +1715,6 @@ i_t pseudo_costs_t::reliable_variable_selection( f_t dual_simplex_elapsed = toc(dual_simplex_start_time); - // TODO put back - // if (rb_mode != 2) { - // if (rb_mode == 1) { - // log.printf( - // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed, %d skipped - // (PDLP) in %.2fs\n", num_candidates, dual_simplex_optimal.load(), num_candidates * 2, - // dual_simplex_infeasible.load(), num_candidates * 2, - // dual_simplex_failed.load(), num_candidates * 2, - // dual_simplex_skipped.load(), dual_simplex_elapsed); - // } else { - // log.printf( - // "RB Dual Simplex: %d candidates, %d/%d optimal, %d/%d infeasible, %d/%d failed in - // %.2fs\n", num_candidates, dual_simplex_optimal.load(), num_candidates * 2, - // dual_simplex_infeasible.load(), num_candidates * 2, dual_simplex_failed.load(), - // num_candidates * 2, dual_simplex_elapsed); - // } - //} - if (use_pdlp) { #pragma omp taskwait @@ -1756,22 +1763,21 @@ i_t pseudo_costs_t::reliable_variable_selection( } } - f_t score = - calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg); + f_t score = calculate_pseudocost_score(j, leaf_solution.x, avg_down, avg_up); if (score > max_score) { max_score = score; branch_var = j; } } - log.printf("RB batch PDLP: %d candidates, %d/%d optimal, %d applied to pseudo-costs\n", - num_candidates, - pdlp_optimal, - num_candidates * 2, - pdlp_applied); + settings.log.debug("RB batch PDLP: %d candidates, %d/%d optimal, %d applied to pseudo-costs\n", + num_candidates, + pdlp_optimal, + num_candidates * 2, + pdlp_applied); } - log.printf( + settings.log.debug( "pc branching on %d. Value %e. Score %e\n", branch_var, leaf_solution.x[branch_var], max_score); return branch_var; @@ -1780,37 +1786,30 @@ i_t pseudo_costs_t::reliable_variable_selection( template f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, const std::vector& solution, - f_t lower_bound, - logger_t& log) + f_t lower_bound) { - const i_t num_fractional = fractional.size(); - f_t estimate = lower_bound; - - i_t num_initialized_down; - i_t num_initialized_up; - f_t pseudo_cost_down_avg; - f_t pseudo_cost_up_avg; - - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + f_t estimate = lower_bound; + f_t avg_down = compute_pseudocost_average_down(); + f_t avg_up = compute_pseudocost_average_up(); for (i_t j : fractional) { - constexpr f_t eps = 1e-6; - i_t num_up = pseudo_cost_num_up[j]; - i_t num_down = pseudo_cost_num_down[j]; - f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; - f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t pc_down = get_pseudocost_down(j, avg_down); + f_t pc_up = get_pseudocost_up(j, avg_up); + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; estimate += std::min(pc_down * f_down, pc_up * f_up); } - log.printf("pseudocost estimate = %e\n", estimate); + settings.log.debug("pseudocost estimate = %e\n", estimate); return estimate; } template void pseudo_costs_t::update_pseudo_costs_from_strong_branching( - const std::vector& fractional, const std::vector& root_soln) + const std::vector& fractional, + const std::vector& strong_branch_down, + const std::vector& strong_branch_up, + const std::vector& root_soln) { for (i_t k = 0; k < fractional.size(); k++) { const i_t j = fractional[k]; @@ -1835,6 +1834,7 @@ void pseudo_costs_t::update_pseudo_costs_from_strong_branching( #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class pseudo_costs_t; +template class pseudo_cost_snapshot_t; template void strong_branching(const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 009bd8b81a..8139054a7b 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -7,8 +7,9 @@ #pragma once -#include +#include #include +#include #include #include @@ -18,7 +19,6 @@ #include #include -#include #include #include @@ -27,354 +27,6 @@ namespace cuopt::linear_programming::dual_simplex { -template -struct branch_variable_t { - i_t variable; - rounding_direction_t direction; -}; - -template -struct pseudo_cost_update_t { - i_t variable; - rounding_direction_t direction; - f_t delta; - double work_timestamp; - int worker_id; - - bool operator<(const pseudo_cost_update_t& other) const - { - if (work_timestamp != other.work_timestamp) return work_timestamp < other.work_timestamp; - if (variable != other.variable) return variable < other.variable; - if (delta != other.delta) return delta < other.delta; - return worker_id < other.worker_id; - } -}; - -template -struct pseudo_cost_averages_t { - f_t down_avg; - f_t up_avg; -}; - -// used to get T from omp_atomic_t based on the fact that omp_atomic_t::operator++ returns T -template -using underlying_type = decltype(std::declval()++); - -// Necessary because omp_atomic_t may be passed instead of f_t -template -auto compute_pseudo_cost_averages(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - size_t n) -{ - using underlying_f_t = underlying_type; - using underlying_i_t = underlying_type; - - underlying_i_t num_initialized_down = 0; - underlying_i_t num_initialized_up = 0; - underlying_f_t pseudo_cost_down_avg = 0.0; - underlying_f_t pseudo_cost_up_avg = 0.0; - - for (size_t j = 0; j < n; ++j) { - if (pc_num_down[j] > 0) { - ++num_initialized_down; - if (std::isfinite(pc_sum_down[j])) { - pseudo_cost_down_avg += pc_sum_down[j] / pc_num_down[j]; - } - } - if (pc_num_up[j] > 0) { - ++num_initialized_up; - if (std::isfinite(pc_sum_up[j])) { pseudo_cost_up_avg += pc_sum_up[j] / pc_num_up[j]; } - } - } - - pseudo_cost_down_avg = - (num_initialized_down > 0) ? pseudo_cost_down_avg / num_initialized_down : 1.0; - pseudo_cost_up_avg = (num_initialized_up > 0) ? pseudo_cost_up_avg / num_initialized_up : 1.0; - - return pseudo_cost_averages_t{pseudo_cost_down_avg, pseudo_cost_up_avg}; -} - -// Variable selection using pseudo-cost product scoring -// Returns the best variable to branch on -template -i_t variable_selection_from_pseudo_costs(const f_t* pc_sum_down, - const f_t* pc_sum_up, - const i_t* pc_num_down, - const i_t* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return -1; - - auto [pc_down_avg, pc_up_avg] = - compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t pc_down = pc_num_down[j] != 0 ? pc_sum_down[j] / pc_num_down[j] : pc_down_avg; - f_t pc_up = pc_num_up[j] != 0 ? pc_sum_up[j] / pc_num_up[j] : pc_up_avg; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); - if (score > max_score) { - max_score = score; - branch_var = j; - } - } - - return branch_var; -} - -// Objective estimate using pseudo-costs (lock-free implementation) -// Returns lower_bound + estimated cost to reach integer feasibility -template -f_t obj_estimate_from_arrays(const f_t* pc_sum_down, - const f_t* pc_sum_up, - const i_t* pc_num_down, - const i_t* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - f_t lower_bound) -{ - auto [pc_down_avg, pc_up_avg] = - compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - f_t estimate = lower_bound; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t pc_down = pc_num_down[j] != 0 ? pc_sum_down[j] / pc_num_down[j] : pc_down_avg; - f_t pc_up = pc_num_up[j] != 0 ? pc_sum_up[j] / pc_num_up[j] : pc_up_avg; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - estimate += std::min(std::max(pc_down * f_down, eps), std::max(pc_up * f_up, eps)); - } - - return estimate; -} - -template -branch_variable_t pseudocost_diving_from_arrays(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - const std::vector& root_solution) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; - - auto avgs = compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - rounding_direction_t round_dir = rounding_direction_t::DOWN; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t pc_down = pc_num_down[j] != 0 ? (f_t)pc_sum_down[j] / (f_t)pc_num_down[j] : avgs.down_avg; - f_t pc_up = pc_num_up[j] != 0 ? (f_t)pc_sum_up[j] / (f_t)pc_num_up[j] : avgs.up_avg; - - f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); - f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); - - f_t score = 0; - rounding_direction_t dir = rounding_direction_t::DOWN; - - f_t root_val = (j < static_cast(root_solution.size())) ? root_solution[j] : solution[j]; - - if (solution[j] < root_val - f_t(0.4)) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else if (solution[j] > root_val + f_t(0.4)) { - score = score_up; - dir = rounding_direction_t::UP; - } else if (f_down < f_t(0.3)) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else if (f_down > f_t(0.7)) { - score = score_up; - dir = rounding_direction_t::UP; - } else if (pc_down < pc_up + eps) { - score = score_down; - dir = rounding_direction_t::DOWN; - } else { - score = score_up; - dir = rounding_direction_t::UP; - } - - if (score > max_score) { - max_score = score; - branch_var = j; - round_dir = dir; - } - } - - if (round_dir == rounding_direction_t::NONE) { - branch_var = fractional[0]; - round_dir = rounding_direction_t::DOWN; - } - - return {branch_var, round_dir}; -} - -template -branch_variable_t guided_diving_from_arrays(const MaybeWrappedF* pc_sum_down, - const MaybeWrappedF* pc_sum_up, - const MaybeWrappedI* pc_num_down, - const MaybeWrappedI* pc_num_up, - i_t n_vars, - const std::vector& fractional, - const std::vector& solution, - const std::vector& incumbent) -{ - const i_t num_fractional = fractional.size(); - if (num_fractional == 0) return {-1, rounding_direction_t::NONE}; - - auto avgs = compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars); - - i_t branch_var = fractional[0]; - f_t max_score = std::numeric_limits::lowest(); - rounding_direction_t round_dir = rounding_direction_t::DOWN; - constexpr f_t eps = f_t(1e-6); - - for (i_t j : fractional) { - f_t f_down = solution[j] - std::floor(solution[j]); - f_t f_up = std::ceil(solution[j]) - solution[j]; - f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); - f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); - rounding_direction_t dir = - down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; - - f_t pc_down = pc_num_down[j] != 0 ? (f_t)pc_sum_down[j] / (f_t)pc_num_down[j] : avgs.down_avg; - f_t pc_up = pc_num_up[j] != 0 ? (f_t)pc_sum_up[j] / (f_t)pc_num_up[j] : avgs.up_avg; - - f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; - f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; - f_t score = (score1 + score2) / 6; - - if (score > max_score) { - max_score = score; - branch_var = j; - round_dir = dir; - } - } - - return {branch_var, round_dir}; -} - -template -class pseudo_cost_snapshot_t { - public: - pseudo_cost_snapshot_t() = default; - - pseudo_cost_snapshot_t(std::vector sum_down, - std::vector sum_up, - std::vector num_down, - std::vector num_up) - : sum_down_(std::move(sum_down)), - sum_up_(std::move(sum_up)), - num_down_(std::move(num_down)), - num_up_(std::move(num_up)) - { - } - - i_t variable_selection(const std::vector& fractional, const std::vector& solution) const - { - return variable_selection_from_pseudo_costs(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution); - } - - f_t obj_estimate(const std::vector& fractional, - const std::vector& solution, - f_t lower_bound) const - { - return obj_estimate_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - lower_bound); - } - - branch_variable_t pseudocost_diving(const std::vector& fractional, - const std::vector& solution, - const std::vector& root_solution) const - { - return pseudocost_diving_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - root_solution); - } - - branch_variable_t guided_diving(const std::vector& fractional, - const std::vector& solution, - const std::vector& incumbent) const - { - return guided_diving_from_arrays(sum_down_.data(), - sum_up_.data(), - num_down_.data(), - num_up_.data(), - n_vars(), - fractional, - solution, - incumbent); - } - - void queue_update( - i_t variable, rounding_direction_t direction, f_t delta, double clock, int worker_id) - { - updates_.push_back({variable, direction, delta, clock, worker_id}); - if (direction == rounding_direction_t::DOWN) { - sum_down_[variable] += delta; - num_down_[variable]++; - } else { - sum_up_[variable] += delta; - num_up_[variable]++; - } - } - - std::vector> take_updates() - { - std::vector> result; - result.swap(updates_); - return result; - } - - i_t n_vars() const { return (i_t)sum_down_.size(); } - - std::vector sum_down_; - std::vector sum_up_; - std::vector num_down_; - std::vector num_up_; - - private: - std::vector> updates_; -}; - template struct reliability_branching_settings_t { // Lower bound for the maximum number of LP iterations for a single trial branching @@ -413,6 +65,12 @@ struct reliability_branching_settings_t { bool rank_candidates_with_dual_pivot = true; }; +template +struct branch_variable_t { + i_t variable; + branch_direction_t direction; +}; + template struct batch_pdlp_warm_cache_t { const raft::handle_t batch_pdlp_handle{}; @@ -425,41 +83,63 @@ struct batch_pdlp_warm_cache_t { bool populated{false}; }; +template +struct pseudo_cost_update_t { + i_t variable; + branch_direction_t direction; + f_t delta; + double work_timestamp; + int worker_id; + + bool operator<(const pseudo_cost_update_t& other) const + { + if (work_timestamp != other.work_timestamp) return work_timestamp < other.work_timestamp; + if (variable != other.variable) return variable < other.variable; + if (delta != other.delta) return delta < other.delta; + return worker_id < other.worker_id; + } +}; + template class pseudo_costs_t { public: - explicit pseudo_costs_t(i_t num_variables) - : pseudo_cost_sum_down(num_variables), + explicit pseudo_costs_t(i_t num_variables, const simplex_solver_settings_t& settings) + : settings(settings), + pseudo_cost_sum_down(num_variables), pseudo_cost_sum_up(num_variables), pseudo_cost_num_down(num_variables), pseudo_cost_num_up(num_variables), pseudo_cost_mutex_up(num_variables), pseudo_cost_mutex_down(num_variables), - AT(1, 1, 1) + AT(std::make_shared>(1, 1, 1)), + pdlp_warm_cache(std::make_shared>()) { } - void update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective); + pseudo_costs_t(const pseudo_costs_t& other) : pseudo_costs_t(1, other.settings) + { + *this = other; + } - pseudo_cost_snapshot_t create_snapshot() const + pseudo_costs_t& operator=(const pseudo_costs_t& other) { - const i_t n = (i_t)pseudo_cost_sum_down.size(); - std::vector sd(n), su(n); - std::vector nd(n), nu(n); - for (i_t j = 0; j < n; ++j) { - sd[j] = pseudo_cost_sum_down[j]; - su[j] = pseudo_cost_sum_up[j]; - nd[j] = pseudo_cost_num_down[j]; - nu[j] = pseudo_cost_num_up[j]; + if (this != &other) { + this->AT = other.AT; + this->pdlp_warm_cache = other.pdlp_warm_cache; + this->pseudo_cost_num_down = other.pseudo_cost_num_down; + this->pseudo_cost_num_up = other.pseudo_cost_num_up; + this->pseudo_cost_sum_down = other.pseudo_cost_sum_down; + this->pseudo_cost_sum_up = other.pseudo_cost_sum_up; } - return pseudo_cost_snapshot_t( - std::move(sd), std::move(su), std::move(nd), std::move(nu)); + return *this; } + void update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective); + void merge_updates(const std::vector>& updates) { for (const auto& upd : updates) { - if (upd.direction == rounding_direction_t::DOWN) { + if (upd.direction == branch_direction_t::DOWN) { pseudo_cost_sum_down[upd.variable] += upd.delta; pseudo_cost_num_down[upd.variable]++; } else { @@ -479,33 +159,42 @@ class pseudo_costs_t { pseudo_cost_mutex_down.resize(num_variables); } - void initialized(i_t& num_initialized_down, - i_t& num_initialized_up, - f_t& pseudo_cost_down_avg, - f_t& pseudo_cost_up_avg) const; + f_t get_pseudocost_down(i_t j, f_t avg) const + { + i_t num = pseudo_cost_num_down[j]; + f_t sum = pseudo_cost_sum_down[j]; + return num > 0 ? sum / num : avg; + } + + f_t get_pseudocost_up(i_t j, f_t avg) const + { + i_t num = pseudo_cost_num_up[j]; + f_t sum = pseudo_cost_sum_up[j]; + return num > 0 ? sum / num : avg; + } + + f_t compute_pseudocost_average_down(); + f_t compute_pseudocost_average_up(); f_t obj_estimate(const std::vector& fractional, const std::vector& solution, - f_t lower_bound, - logger_t& log); + f_t lower_bound); - i_t variable_selection(const std::vector& fractional, - const std::vector& solution, - logger_t& log); + i_t variable_selection(const std::vector& fractional, const std::vector& solution); i_t reliable_variable_selection(const mip_node_t* node_ptr, const std::vector& fractional, branch_and_bound_worker_t* worker, const std::vector& var_types, const branch_and_bound_stats_t& bnb_stats, - const simplex_solver_settings_t& settings, f_t upper_bound, int max_num_tasks, - logger_t& log, const std::vector& new_slacks, const lp_problem_t& original_lp); void update_pseudo_costs_from_strong_branching(const std::vector& fractional, + const std::vector& strong_branch_down, + const std::vector& strong_branch_up, const std::vector& root_soln); uint32_t compute_state_hash() const @@ -514,31 +203,68 @@ class pseudo_costs_t { detail::compute_hash(pseudo_cost_num_down) ^ detail::compute_hash(pseudo_cost_num_up); } - uint32_t compute_strong_branch_hash() const - { - return detail::compute_hash(strong_branch_down) ^ detail::compute_hash(strong_branch_up); - } - f_t calculate_pseudocost_score(i_t j, const std::vector& solution, - f_t pseudo_cost_up_avg, - f_t pseudo_cost_down_avg) const; + f_t avg_down, + f_t avg_up) const; + + std::shared_ptr> AT; // Transpose of the constraint matrix A + std::shared_ptr> pdlp_warm_cache; reliability_branching_settings_t reliability_branching_settings; + simplex_solver_settings_t settings; - csc_matrix_t AT; // Transpose of the constraint matrix A + protected: std::vector> pseudo_cost_sum_up; std::vector> pseudo_cost_sum_down; std::vector> pseudo_cost_num_up; std::vector> pseudo_cost_num_down; - std::vector strong_branch_down; - std::vector strong_branch_up; std::vector pseudo_cost_mutex_up; std::vector pseudo_cost_mutex_down; - omp_atomic_t num_strong_branches_completed = 0; - omp_atomic_t strong_branching_lp_iter = 0; - batch_pdlp_warm_cache_t pdlp_warm_cache; + omp_atomic_t strong_branching_lp_iter = 0; +}; + +template +class pseudo_cost_snapshot_t : public pseudo_costs_t { + public: + using Base = pseudo_costs_t; + using Base::Base; + + pseudo_cost_snapshot_t(const pseudo_costs_t& other) : Base(1, other.settings) + { + Base::operator=(other); + } + + pseudo_cost_snapshot_t operator=(const pseudo_costs_t& other) + { + return Base::operator=(other); + } + + void queue_update( + i_t variable, branch_direction_t direction, f_t delta, double clock, int worker_id) + { + updates_.push_back({variable, direction, delta, clock, worker_id}); + if (direction == branch_direction_t::DOWN) { + this->pseudo_cost_sum_down[variable] += delta; + ++this->pseudo_cost_num_down[variable]; + } else { + this->pseudo_cost_sum_up[variable] += delta; + ++this->pseudo_cost_num_up[variable]; + } + } + + std::vector> take_updates() + { + std::vector> result; + result.swap(updates_); + return result; + } + + i_t n_vars() const { return this->pseudo_cost_sum_down.size(); } + + private: + std::vector> updates_; }; template diff --git a/cpp/src/branch_and_bound/worker.hpp b/cpp/src/branch_and_bound/worker.hpp new file mode 100644 index 0000000000..acb8ccca87 --- /dev/null +++ b/cpp/src/branch_and_bound/worker.hpp @@ -0,0 +1,293 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +struct branch_and_bound_stats_t { + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + omp_atomic_t nodes_since_last_log = 0; + omp_atomic_t last_log = 0.0; +}; + +template +bool is_search_strategy_enabled(search_strategy_t strategy, + bool has_incumbent, + diving_heuristics_settings_t settings) +{ + switch (strategy) { + case BEST_FIRST: return true; + case PSEUDOCOST_DIVING: return settings.pseudocost_diving != 0; + case LINE_SEARCH_DIVING: return settings.line_search_diving != 0; + case GUIDED_DIVING: return settings.guided_diving != 0 && has_incumbent; + case COEFFICIENT_DIVING: return settings.coefficient_diving != 0; + default: return false; + } +} + +template +class branch_and_bound_worker_t { + public: + using float_type = f_t; + using int_type = i_t; + + i_t worker_id; + omp_atomic_t search_strategy; + omp_atomic_t is_active; + omp_atomic_t lower_bound; + + lp_problem_t leaf_problem; + lp_solution_t leaf_solution; + std::vector leaf_vstatus; + std::vector leaf_edge_norms; + + basis_update_mpf_t basis_factors; + std::vector basic_list; + std::vector nonbasic_list; + + bounds_strengthening_t node_presolver; + std::vector bounds_changed; + + std::vector start_lower; + std::vector start_upper; + + pcgenerator_t rng; + + bool recompute_basis = true; + bool recompute_bounds = true; + + branch_and_bound_worker_t(i_t worker_id, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings, + uint64_t rng_offset = 0) + : worker_id(worker_id), + search_strategy(BEST_FIRST), + is_active(false), + lower_bound(-std::numeric_limits::infinity()), + leaf_problem(original_lp), + leaf_solution(original_lp.num_rows, original_lp.num_cols), + leaf_vstatus(original_lp.num_cols), + basis_factors(original_lp.num_rows, settings.refactor_frequency), + basic_list(original_lp.num_rows), + nonbasic_list(), + node_presolver(leaf_problem, Arow, {}, var_type), + bounds_changed(original_lp.num_cols, false), + rng(settings.random_seed + pcgenerator_t::default_seed + rng_offset + worker_id, + pcgenerator_t::default_stream ^ (worker_id + rng_offset)) + { + } + + // Set the variables bounds for the LP relaxation in the current node. + bool set_lp_variable_bounds(mip_node_t* node_ptr, + const simplex_solver_settings_t& settings) + { + // Reset the bound_changed markers + std::fill(bounds_changed.begin(), bounds_changed.end(), false); + + // Set the correct bounds for the leaf problem + if (recompute_bounds) { + leaf_problem.lower = start_lower; + leaf_problem.upper = start_upper; + node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); + + } else { + node_ptr->update_branched_variable_bounds( + leaf_problem.lower, leaf_problem.upper, bounds_changed); + } + + return node_presolver.bounds_strengthening( + settings, bounds_changed, leaf_problem.lower, leaf_problem.upper); + } + + void set_active() { is_active = true; } +}; + +template +class bfs_worker_t : public branch_and_bound_worker_t { + public: + using Base = branch_and_bound_worker_t; + bfs_worker_t(i_t worker_id, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings, + uint64_t rng_offset = 0) + : Base(worker_id, original_lp, Arow, var_type, settings, rng_offset) + { + this->start_lower = original_lp.lower; + this->start_upper = original_lp.upper; + this->search_strategy = BEST_FIRST; + + max_diving_workers.fill(0); + active_diving_workers.fill(0); + total_active_diving_workers = 0; + } + + void init(const std::vector*>& nodes) + { + assert(!this->is_active.load()); + assert(node_queue.best_first_queue_size() == 0); + assert(nodes.size() > 0); + + for (auto* node : nodes) { + assert(node != nullptr); + node_queue.push(node); + } + + this->lower_bound = node_queue.get_lower_bound(); + } + + void set_inactive() { this->is_active = false; } + + // Steal nodes from another worker + bool steal_node_from(bfs_worker_t* other, i_t num_nodes) + { + bool steal = false; + assert(num_nodes > 0); + + if (!other->is_active || this == other || + other->node_queue.best_first_queue_size() < 2 * num_nodes) { + return steal; + } + + while (num_nodes > 0) { + other->node_queue.lock(); + mip_node_t* node = other->node_queue.best_first_queue_size() > num_nodes + ? other->node_queue.pop_best_first() + : nullptr; + other->node_queue.unlock(); + if (node == nullptr) { break; } + + this->node_queue.lock(); + this->node_queue.push(node); + this->node_queue.unlock(); + --num_nodes; + steal = true; + } + + return steal; + } + + // Calculate the number of diving workers that this worker can launch. Having a fixed number + // of workers allows the solver to be more deterministic. + void calculate_num_diving_workers(i_t num_bfs_workers, + i_t total_diving_workers, + bool has_incumbent, + const diving_heuristics_settings_t& settings) + { + i_t num_active = 0; + for (i_t i = 1; i < num_search_strategies; ++i) { + num_active += is_search_strategy_enabled(search_strategies[i], has_incumbent, settings); + } + + total_max_diving_workers = 0; + max_diving_workers.fill(0); + if (num_active == 0) { return; } + + for (size_t i = 1, k = 0; i < num_search_strategies; ++i) { + if (is_search_strategy_enabled(search_strategies[i], has_incumbent, settings)) { + // Calculate the number of workers for a given diving heuristic + i_t start = std::floor((double)k * total_diving_workers / num_active); + i_t end = std::floor((double)(k + 1) * total_diving_workers / num_active); + i_t workers_per_type = end - start; + + // Calculate the number of diving workers allocated to this (best-first) worker + start = std::floor((double)this->worker_id * workers_per_type / num_bfs_workers); + end = std::floor((double)(this->worker_id + 1) * workers_per_type / num_bfs_workers); + max_diving_workers[i] = end - start; + total_max_diving_workers += max_diving_workers[i]; + ++k; + } + } + } + + // The worker-local node heap. + node_queue_t node_queue; + + // The number of diving workers of each type that this (best-first) worker can launch. + std::array max_diving_workers; + + // The number of active diving workers of each type associated with this (best-first) worker. + std::array, num_search_strategies> active_diving_workers; + + // Keep track of the total number of active diving worker that are associated with this + // (best-first) worker + omp_atomic_t total_active_diving_workers{0}; + + // The maximum number of diving worker that are associated with this + // (best-first) worker + i_t total_max_diving_workers{0}; +}; + +template +class diving_worker_t : public branch_and_bound_worker_t { + public: + using Base = branch_and_bound_worker_t; + using Base::Base; + + // After calling this routine, you need to set `is_active = true` when the worker is ready. + // Note that the starting node may be dropped if become infeasible via bound propagation. + void init(const mip_node_t* node, const lp_problem_t& original_lp) + { + // Creates a copy of the node that is disconnected from the main tree, such that the + // diving does not modify the main tree. We need to store the variables bounds + // associated with this node, since we cannot retrieve it from the tree + start_node = node->detach_copy(); + this->start_lower = original_lp.lower; + this->start_upper = original_lp.upper; + this->lower_bound = node->lower_bound; + std::fill(this->bounds_changed.begin(), this->bounds_changed.end(), false); + node->get_variable_bounds(this->start_lower, this->start_upper, this->bounds_changed); + } + + // Apply bound strengthening to the starting variable bounds + bool presolve_start_bounds(const simplex_solver_settings_t& settings) + { + return this->node_presolver.bounds_strengthening( + settings, this->bounds_changed, this->start_lower, this->start_upper); + } + + // Set this node inactive + void set_inactive() + { + assert(this->is_active.load()); + assert(bfs_worker != nullptr); + assert(bfs_worker->active_diving_workers[this->search_strategy].load() > 0); + assert(bfs_worker->total_active_diving_workers.load() > 0); + + this->is_active = false; + --bfs_worker->active_diving_workers[this->search_strategy]; + --bfs_worker->total_active_diving_workers; + } + + mip_node_t start_node; + + // The best-first worker that is associated with this diving worker. Used for controlling the + // number of active diving workers. + bfs_worker_t* bfs_worker{nullptr}; +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp new file mode 100644 index 0000000000..9396f48c04 --- /dev/null +++ b/cpp/src/branch_and_bound/worker_pool.hpp @@ -0,0 +1,117 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +class worker_pool_t { + public: + using i_t = typename WorkerType::int_type; + using f_t = typename WorkerType::float_type; + + void init(i_t num_workers, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings, + const uint64_t rng_offset = 0) + { + assert(!is_initialized); + assert(num_workers > 0); + + workers_.resize(num_workers); + num_idle_workers_ = num_workers; + idle_workers_.clear_resize(num_workers); + for (i_t i = 0; i < num_workers; ++i) { + workers_[i] = + std::make_unique(i, original_lp, Arow, var_type, settings, rng_offset); + idle_workers_.push_back(i); + } + + is_initialized = true; + } + + WorkerType* pop_idle_worker() + { + std::lock_guard lock(mutex_); + if (idle_workers_.empty()) { + return nullptr; + } else { + i_t idx = idle_workers_.front(); + idle_workers_.pop_front(); + num_idle_workers_--; + assert(idle_workers_.size() == static_cast(num_idle_workers_.load())); + assert(idx >= 0 && static_cast(idx) < workers_.size()); + return workers_[idx].get(); + } + } + + void return_worker_to_pool(WorkerType* worker) + { + std::lock_guard lock(mutex_); + assert(worker != nullptr); + assert(workers_[worker->worker_id].get() == worker); + assert(!worker->is_active.load()); + assert(static_cast(num_idle_workers_.load()) == idle_workers_.size()); + assert(idle_workers_.size() <= workers_.size()); + + idle_workers_.push_back(worker->worker_id); + num_idle_workers_++; + } + + f_t get_lower_bound() + { + f_t lower_bound = std::numeric_limits::infinity(); + + if (is_initialized) { + for (i_t i = 0; i < workers_.size(); ++i) { + if (workers_[i]->is_active.load()) { + lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); + } + } + } + + return lower_bound; + } + + WorkerType* operator[](i_t id) + { + assert(id >= 0 && static_cast(id) < workers_.size()); + assert(workers_[id] != nullptr); + return workers_[id].get(); + } + WorkerType* operator[](i_t id) const + { + assert(id >= 0 && static_cast(id) < workers_.size()); + assert(workers_[id] != nullptr); + return workers_[id].get(); + } + + i_t num_idle_workers() const { return num_idle_workers_; } + i_t num_workers() const { return workers_.size(); } + + private: + std::vector> workers_; + bool is_initialized = false; + + omp_mutex_t mutex_; + circular_deque_t idle_workers_; + omp_atomic_t num_idle_workers_{0}; +}; + +template +using bfs_worker_pool_t = worker_pool_t>; + +template +using diving_worker_pool_t = worker_pool_t>; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/initial_basis.cpp b/cpp/src/dual_simplex/initial_basis.cpp index d69bf8877e..dcb1615359 100644 --- a/cpp/src/dual_simplex/initial_basis.cpp +++ b/cpp/src/dual_simplex/initial_basis.cpp @@ -18,6 +18,90 @@ namespace cuopt::linear_programming::dual_simplex { +uint8_t encode(variable_status_t vstatus) +{ + assert(vstatus != variable_status_t::SUPERBASIC && + "packed_variable_status_t does not support superbasic variables"); + + uint8_t val = 0; + switch (vstatus) { + case variable_status_t::BASIC: val = 0b00; break; + case variable_status_t::NONBASIC_LOWER: val = 0b01; break; + case variable_status_t::NONBASIC_UPPER: val = 0b10; break; + case variable_status_t::NONBASIC_FIXED: val = 0b01; break; + default: val = 0b11; + } + + return val; +} + +variable_status_t decode(uint8_t val) +{ + val &= 0b11; + if (val == 0b00) return variable_status_t::BASIC; + if (val == 0b01) return variable_status_t::NONBASIC_LOWER; + if (val == 0b10) return variable_status_t::NONBASIC_UPPER; + return variable_status_t::NONBASIC_FREE; +} + +std::vector compress_vstatus(const std::vector& vstatus) +{ + size_t n = vstatus.size() / 4; + size_t has_tail = (vstatus.size() % 4 > 0); + + std::vector packed_vstatus; + packed_vstatus.resize(n + has_tail); + + for (size_t i = 0; i < n; ++i) { + size_t j = i * 4; + packed_vstatus[i] = 0; + packed_vstatus[i] |= encode(vstatus[j]); + packed_vstatus[i] |= encode(vstatus[j + 1]) << 2; + packed_vstatus[i] |= encode(vstatus[j + 2]) << 4; + packed_vstatus[i] |= encode(vstatus[j + 3]) << 6; + } + + if (has_tail) { + size_t j = n * 4; + packed_vstatus[n] = 0; + packed_vstatus[n] |= encode(vstatus[j]); + if (j + 1 < vstatus.size()) packed_vstatus[n] |= encode(vstatus[j + 1]) << 2; + if (j + 2 < vstatus.size()) packed_vstatus[n] |= encode(vstatus[j + 2]) << 4; + if (j + 3 < vstatus.size()) packed_vstatus[n] |= encode(vstatus[j + 3]) << 6; + } + + return packed_vstatus; +} + +std::vector decompress_vstatus(const std::vector& packed_vstatus, + size_t vstatus_size) +{ + size_t n = vstatus_size / 4; + size_t has_tail = (vstatus_size % 4 > 0); + + std::vector vstatus; + vstatus.resize(vstatus_size); + assert(packed_vstatus.size() == n + has_tail); + + for (size_t i = 0; i < n; ++i) { + size_t j = i * 4; + vstatus[j] = decode(packed_vstatus[i]); + vstatus[j + 1] = decode(packed_vstatus[i] >> 2); + vstatus[j + 2] = decode(packed_vstatus[i] >> 4); + vstatus[j + 3] = decode(packed_vstatus[i] >> 6); + } + + if (has_tail) { + size_t j = n * 4; + vstatus[j] = decode(packed_vstatus[n]); + if (j + 1 < vstatus.size()) vstatus[j + 1] = decode(packed_vstatus[n] >> 2); + if (j + 2 < vstatus.size()) vstatus[j + 2] = decode(packed_vstatus[n] >> 4); + if (j + 3 < vstatus.size()) vstatus[j + 3] = decode(packed_vstatus[n] >> 6); + } + + return vstatus; +} + template i_t initial_basis_selection(const lp_problem_t& problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/initial_basis.hpp b/cpp/src/dual_simplex/initial_basis.hpp index 646785fbd2..22a172dbe0 100644 --- a/cpp/src/dual_simplex/initial_basis.hpp +++ b/cpp/src/dual_simplex/initial_basis.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -23,6 +23,10 @@ enum class variable_status_t : int8_t { SUPERBASIC = 4 }; +std::vector compress_vstatus(const std::vector& vstatus); +std::vector decompress_vstatus(const std::vector& packed_vstatus, + size_t vstatus_size); + template i_t initial_basis_selection(const lp_problem_t& problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index cfc120e477..0f35a7d479 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -110,6 +110,9 @@ struct simplex_solver_settings_t { mip_batch_pdlp_reliability_branching(0), strong_branching_simplex_iteration_limit(-1), random_seed(0), + bnb_steal_chance(-1), + bnb_nodes_per_steal(10), + bnb_max_steal_attempts(3), reliability_branching(-1), inside_mip(0), sub_mip(0), @@ -198,13 +201,21 @@ struct simplex_solver_settings_t { // PDLP only // Set the maximum number of simplex iterations allowed per trial branch when applying // strong branching to the root node. - // -1 - Automatic (iteration limit = 200) - // 0, 1 - Estimate the objective change using a single pivot of dual simplex - // >1 - Set as the iteration limit in dual simplex + // -1 - automatic (iteration limit = 200) + // 0, 1 - estimate the objective change using a single pivot of dual simplex + // >1 - set as the iteration limit in dual simplex i_t strong_branching_simplex_iteration_limit; diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics + // In B&B, indicate the chance in which a worker can steal a node from another worker. + // -1 - automatic (0.05) + // 0 - disable + // >0 - set the stealing chance [0, 1] + f_t bnb_steal_chance; + i_t bnb_nodes_per_steal; + i_t bnb_max_steal_attempts; + // Settings for the reliability branching. // - -1: automatic // - 0: disable (use pseudocost branching instead) diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index ce6b602fba..f4d1735e18 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -356,6 +356,21 @@ solution_t mip_solver_t::run_solver() ? 2 : context.settings.reduced_cost_strengthening; + char* steal_chance_str = std::getenv("CUOPT_BNB_STEAL_CHANCE"); + if (steal_chance_str != nullptr) { + branch_and_bound_settings.bnb_steal_chance = atof(steal_chance_str); + } + + char* max_steal_attempts_str = std::getenv("CUOPT_BNB_MAX_STEAL_ATTEMPTS"); + if (max_steal_attempts_str != nullptr) { + branch_and_bound_settings.bnb_max_steal_attempts = atoi(max_steal_attempts_str); + } + + char* nodes_per_steal_str = std::getenv("CUOPT_BNB_NODES_PER_STEAL"); + if (nodes_per_steal_str != nullptr) { + branch_and_bound_settings.bnb_nodes_per_steal = atoi(nodes_per_steal_str); + } + if (context.settings.num_cpu_threads < 0) { branch_and_bound_settings.num_threads = std::max(1, omp_get_max_threads() - 1); } else { diff --git a/cpp/src/utilities/circular_deque.hpp b/cpp/src/utilities/circular_deque.hpp new file mode 100644 index 0000000000..6e9d8f5b05 --- /dev/null +++ b/cpp/src/utilities/circular_deque.hpp @@ -0,0 +1,114 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +namespace cuopt { + +// A fixed-capacity double-ended queue backed by a circular buffer. +// All operations are O(1) with no dynamic allocation after construction. +// +// Preconditions (asserted in debug builds): +// - push_front / push_back : size() < capacity() +// - pop_front / pop_back : !empty() +// - front / back : !empty() +template +class circular_deque_t { + public: + circular_deque_t() : buffer_(1), capacity_(1), head_(0), tail_(0) {} + + // Allocates storage for exactly `capacity` elements up front. + explicit circular_deque_t(size_t capacity) + : buffer_(capacity + 1), // +1 to distinguish full (next(tail)==head) from empty (head==tail) + capacity_(capacity + 1), + head_(0), + tail_(0) + { + assert(capacity > 0); + } + + bool empty() const { return head_ == tail_; } + bool full() const { return next(tail_) == head_; } + + size_t size() const { return (tail_ - head_ + capacity_) % capacity_; } + size_t capacity() const { return capacity_ - 1; } + + void clear_resize(size_t new_capacity) + { + assert(new_capacity > 0); + head_ = 0; + tail_ = 0; + capacity_ = new_capacity + 1; + buffer_.resize(capacity_); + } + + void push_back(T val) + { + assert(!full()); + buffer_[tail_] = std::move(val); + tail_ = next(tail_); + } + + void push_front(T val) + { + assert(!full()); + head_ = prev(head_); + buffer_[head_] = std::move(val); + } + + T pop_front() + { + assert(!empty()); + T val = std::move(buffer_[head_]); + head_ = next(head_); + return val; + } + + T pop_back() + { + assert(!empty()); + tail_ = prev(tail_); + return std::move(buffer_[tail_]); + } + + T& front() + { + assert(!empty()); + return buffer_[head_]; + } + const T& front() const + { + assert(!empty()); + return buffer_[head_]; + } + + T& back() + { + assert(!empty()); + return buffer_[prev(tail_)]; + } + const T& back() const + { + assert(!empty()); + return buffer_[prev(tail_)]; + } + + private: + size_t next(size_t idx) const { return (idx + 1) % capacity_; } + size_t prev(size_t idx) const { return (idx + capacity_ - 1) % capacity_; } + + std::vector buffer_; + size_t capacity_; + size_t head_; + size_t tail_; +}; + +} // namespace cuopt diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index f6e66472dd..97816f3933 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -20,11 +20,9 @@ namespace cuopt { class omp_mutex_t { public: omp_mutex_t() : mutex(new omp_lock_t) { omp_init_lock(mutex.get()); } - - omp_mutex_t(const omp_mutex_t&) = delete; - omp_mutex_t(omp_mutex_t&& other) { *this = std::move(other); } + omp_mutex_t(const omp_mutex_t&) = delete; omp_mutex_t& operator=(const omp_mutex_t&) = delete; omp_mutex_t& operator=(omp_mutex_t&& other) @@ -54,6 +52,15 @@ class omp_mutex_t { std::unique_ptr mutex; }; +// Empty class with the same methods as `omp_mutex_t`. This is mainly used for cleanly disabling +// the `omp_mutex_t` via type alias (`lock` and `unlock` are replaced by NOOPs). +class fake_omp_mutex_t { + public: + static void lock() {} + static void unlock() {} + static bool try_lock() { return true; } +}; + // Wrapper for omp atomic operations. See // https://www.openmp.org/spec-html/5.1/openmpsu105.html. template @@ -117,20 +124,17 @@ class omp_atomic_t { T fetch_sub(T inc) { return fetch_add(-inc); } + // Get the underlying value without atomics + T& underlying() { return val; } + T underlying() const { return val; } + private: T val; -#ifndef __NVCC__ friend double fetch_min(omp_atomic_t& atomic_var, double other); friend double fetch_max(omp_atomic_t& atomic_var, double other); -#endif }; -// Atomic CAS are only supported in OpenMP v5.1 -// (gcc 12+ or clang 14+), however, nvcc (or the host compiler) cannot -// parse it correctly yet -#ifndef __NVCC__ - // Free non-template functions are necessary because of a clang 20 bug // when omp atomic compare is used within a templated context. // see https://github.com/llvm/llvm-project/issues/127466 @@ -155,7 +159,6 @@ inline double fetch_max(omp_atomic_t& atomic_var, double other) } return old; } -#endif #endif diff --git a/cpp/src/utilities/pcgenerator.hpp b/cpp/src/utilities/pcgenerator.hpp index 29a865f02f..e83e5f36ad 100644 --- a/cpp/src/utilities/pcgenerator.hpp +++ b/cpp/src/utilities/pcgenerator.hpp @@ -21,12 +21,11 @@ class pcgenerator_t { static constexpr uint64_t default_stream = 0xda3e39cb94b95bdbULL; /** - * @brief ctor. Initializes the PCG - * @param rng_state is the generator state used for initializing the generator - * @param subsequence specifies the subsequence to be generated out of 2^64 possible subsequences - * In a parallel setting, like threads of a CUDA kernel, each thread is required to generate a - * unique set of random numbers. This can be achieved by initializing the generator with same - * rng_state for all the threads and diststreamt values for subsequence. + * @brief Initializes the PCG generator. + * @param seed Generator state seed. + * @param subsequence Selects one of 2^64 independent subsequences. Use distinct values per + * thread to guarantee non-overlapping streams in parallel contexts. + * @param offset Number of outputs to skip ahead before the first draw. */ pcgenerator_t(const uint64_t seed = default_seed, const uint64_t subsequence = default_stream, @@ -35,7 +34,12 @@ class pcgenerator_t { set_seed(seed, subsequence, offset); } - // Set the seed, subsequence and offset of the PCG + /** + * @brief Re-seeds the generator. + * @param seed Generator state seed. + * @param subsequence Selects one of 2^64 independent subsequences. + * @param offset Number of outputs to skip ahead before the first draw. + */ void set_seed(uint64_t seed, const uint64_t subsequence = default_stream, uint64_t offset = 0) { state = uint64_t(0); @@ -47,8 +51,12 @@ class pcgenerator_t { skipahead(offset); } - // Based on "Random Number Generation with Arbitrary Strides" F. B. Brown - // Link https://mcnp.lanl.gov/pdf_files/anl-rn-arb-stride.pdf + /** + * @brief Advances the generator state by @p offset steps in O(log offset) time. + * + * Uses the closed-form LCG jump described in "Random Number Generation with Arbitrary Strides" + * (F. B. Brown, https://mcnp.lanl.gov/pdf_files/anl-rn-arb-stride.pdf). + */ void skipahead(uint64_t offset) { uint64_t G = 1; @@ -68,9 +76,7 @@ class pcgenerator_t { } /** - * @defgroup NextRand Generate the next random number - * @brief This code is derived from PCG basic code - * @{ + * @returns the next uniformly distributed 32-bit unsigned integer. */ uint32_t next_u32() { @@ -83,6 +89,9 @@ class pcgenerator_t { return ret; } + /** + * @returns the next uniformly distributed 64-bit unsigned integer. + */ uint64_t next_u64() { uint64_t ret; @@ -93,6 +102,10 @@ class pcgenerator_t { return ret; } + /** + * @returns the next uniformly distributed non-negative 32-bit signed integer in [0, + * INT32_MAX]. + */ int32_t next_i32() { int32_t ret; @@ -102,6 +115,10 @@ class pcgenerator_t { return ret; } + /** + * @returns the next uniformly distributed non-negative 64-bit signed integer in [0, + * INT64_MAX]. + */ int64_t next_i64() { int64_t ret; @@ -111,10 +128,19 @@ class pcgenerator_t { return ret; } - float next_float() { return static_cast((next_u32() >> 8) * 0x1.0p-24); } + /** + * @returns a uniformly distributed float in [0, 1). + */ + float next_float() { return (next_u32() >> 8) * 0x1.0p-24; } - double next_double() { return static_cast((next_u64() >> 11) * 0x1.0p-53); } + /** + * @returns a uniformly distributed double in [0, 1). + */ + double next_double() { return (next_u64() >> 11) * 0x1.0p-53; } + /** + * @returns the next random value of type @p T. + */ template T next() { @@ -130,9 +156,11 @@ class pcgenerator_t { void next(float& ret) { ret = next_float(); } void next(double& ret) { ret = next_double(); } - /// Draws a sample from a uniform distribution. The samples are uniformly distributed over - /// the semi-closed interval `[low, high)`. This routine may have a **slight bias** toward - /// some numbers in the range (scaling by floating-point). + /** + * @brief Draws a sample from a uniform distribution over `[low, high)`. + * + * May have a slight bias toward some values due to floating-point scaling. + */ template T uniform(T low, T high) { @@ -141,7 +169,9 @@ class pcgenerator_t { return low + (val * range); } - // Shuffles the contents of a sequence using the Fisher–Yates algorithm. + /** + * @brief Shuffles @p seq in-place using the Fisher-Yates algorithm. + */ template void shuffle(std::vector& seq) {