diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index e69ff7b9a..35225b042 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -25,6 +26,7 @@ #include #include +#include #include @@ -2224,13 +2226,37 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut f_t last_objective = root_objective_; f_t root_relax_objective = root_objective_; + constexpr bool enable_root_cut_cpufj = true; + std::unique_ptr> root_cut_cpufj_task; + auto root_cut_cpufj_improvement_callback = + [this](f_t obj, const std::vector& assignment, double) { + std::vector user_assignment; + uncrush_primal_solution(original_problem_, original_lp_, assignment, user_assignment); + settings_.log.debug("Root cut CPUFJ found solution with objective %.16e\n", obj); + set_new_solution(user_assignment); + }; + auto stop_root_cut_cpufj = [&]() { + if (!root_cut_cpufj_task) { return; } + detail::stop_fj_cpu_task(*root_cut_cpufj_task); + root_cut_cpufj_task.reset(); + }; + cuopt::scope_guard root_cut_cpufj_guard([&]() { stop_root_cut_cpufj(); }); + + enum class cut_pass_action_t { CONTINUE, BREAK, RETURN }; + struct cut_pass_result_t { + cut_pass_action_t action{cut_pass_action_t::CONTINUE}; + mip_status_t status{mip_status_t::UNSET}; + }; + f_t cut_generation_start_time = tic(); i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL; - } else { + } + + auto do_cut_pass = [&]() -> cut_pass_result_t { #ifdef PRINT_FRACTIONAL_INFO settings_.log.printf( "Found %d fractional variables on cut pass %d\n", num_fractional, cut_pass); @@ -2243,7 +2269,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } #endif - // Generate cuts and add them to the cut pool f_t cut_start_time = tic(); bool problem_feasible = cut_generation.generate_cuts(original_lp_, settings_, @@ -2263,7 +2288,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.heuristic_preemption_callback(); } finish_clique_thread(); - return mip_status_t::INFEASIBLE; + return {cut_pass_action_t::RETURN, mip_status_t::INFEASIBLE}; } f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { @@ -2279,7 +2304,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector cut_rhs; std::vector cut_types; i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types); - if (num_cuts == 0) { break; } + if (num_cuts == 0) { return {cut_pass_action_t::BREAK, mip_status_t::UNSET}; } cut_info.record_cut_types(cut_types); #ifdef PRINT_CUT_POOL_TYPES cut_pool.print_cutpool_types(); @@ -2293,7 +2318,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut for (i_t i = 0; i < static_cast(cut_types.size()); ++i) { settings_.log.printf("row %d cut type %d\n", i, cut_types[i]); } - return mip_status_t::NUMERICAL; + return {cut_pass_action_t::RETURN, mip_status_t::NUMERICAL}; } #endif // Check against saved solution @@ -2333,7 +2358,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } if (add_cuts_status != 0) { settings_.log.printf("Failed to add cuts\n"); - return mip_status_t::NUMERICAL; + return {cut_pass_action_t::RETURN, mip_status_t::NUMERICAL}; } if (settings_.reduced_cost_strengthening >= 1 && upper_bound_.load() < last_upper_bound) { @@ -2377,7 +2402,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #ifdef WRITE_BOUND_STRENGTHENING_INFEASIBLE_MPS original_lp_.write_mps("bound_strengthening_infeasible.mps"); #endif - return mip_status_t::INFEASIBLE; + return {cut_pass_action_t::RETURN, mip_status_t::INFEASIBLE}; } i_t iter = 0; @@ -2405,7 +2430,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (cut_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; set_final_solution(solution, root_objective_); - return solver_status_; + return {cut_pass_action_t::RETURN, solver_status_}; } if (cut_status != dual::status_t::OPTIMAL) { @@ -2430,7 +2455,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #ifdef WRITE_CUT_INFEASIBLE_MPS original_lp_.write_mps("cut_infeasible.mps"); #endif - return mip_status_t::NUMERICAL; + return {cut_pass_action_t::RETURN, mip_status_t::NUMERICAL}; } } root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); @@ -2475,7 +2500,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (rel_gap < settings_.relative_mip_gap_tol || abs_gap < settings_.absolute_mip_gap_tol) { if (num_fractional == 0) { set_solution_at_root(solution, cut_info); } set_final_solution(solution, root_objective_); - return mip_status_t::OPTIMAL; + return {cut_pass_action_t::RETURN, mip_status_t::OPTIMAL}; } f_t change_in_objective = root_objective_ - last_objective; @@ -2487,9 +2512,47 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut "Change in objective %.16e is less than 1e-3 of root relax objective %.16e\n", change_in_objective, root_relax_objective); - break; + return {cut_pass_action_t::BREAK, mip_status_t::UNSET}; } last_objective = root_objective_; + return {cut_pass_action_t::CONTINUE, mip_status_t::UNSET}; + }; + + cut_pass_result_t cut_pass_result; +#pragma omp parallel num_threads(root_cut_cpufj_task ? 2 : 1) + { +#pragma omp single + { + if (root_cut_cpufj_task) { +#pragma omp task shared(root_cut_cpufj_task) default(none) + detail::run_fj_cpu_task(*root_cut_cpufj_task, + std::numeric_limits::infinity(), + std::numeric_limits::infinity()); + } + + cut_pass_result = do_cut_pass(); + + if (root_cut_cpufj_task) { detail::stop_fj_cpu_task(*root_cut_cpufj_task); } +#pragma omp taskwait + } + } + + if (cut_pass_result.action == cut_pass_action_t::RETURN) { return cut_pass_result.status; } + if (cut_pass_result.action == cut_pass_action_t::BREAK) { break; } + + if (enable_root_cut_cpufj && !settings_.deterministic && settings_.num_threads >= 2 && + cut_pass + 1 < settings_.max_cut_passes) { + f_t root_cut_cpufj_build_start_time = tic(); + root_cut_cpufj_task = + detail::make_fj_cpu_task_from_host_lp(original_lp_, + var_types_, + root_relax_soln_.x, + settings_, + root_cut_cpufj_improvement_callback, + "[RootCut CPUFJ] "); + settings_.log.debug("Root cut CPUFJ problem build time after pass %d: %.6f seconds\n", + cut_pass, + toc(root_cut_cpufj_build_start_time)); } } @@ -2504,6 +2567,24 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut original_lp_.A.col_start[original_lp_.A.n]); } + if (enable_root_cut_cpufj && cut_info.has_cuts()) { + f_t root_cut_cpufj_build_start_time = tic(); + root_cut_cpufj_task = + detail::make_fj_cpu_task_from_host_lp(original_lp_, + var_types_, + root_relax_soln_.x, + settings_, + root_cut_cpufj_improvement_callback, + "[RootCut CPUFJ] "); + settings_.log.debug("Root cut CPUFJ final problem build time: %.6f seconds\n", + toc(root_cut_cpufj_build_start_time)); + f_t fj_time_limit = settings_.deterministic + ? f_t(settings_.time_limit - toc(exploration_stats_.start_time)) + : f_t{1}; + detail::run_fj_cpu_task(*root_cut_cpufj_task, fj_time_limit, 0.5); + root_cut_cpufj_task.reset(); + } + set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); pc_.resize(original_lp_.num_cols); diff --git a/cpp/src/mip_heuristics/feasibility_jump/cpu_fj_thread.cuh b/cpp/src/mip_heuristics/feasibility_jump/cpu_fj_thread.cuh new file mode 100644 index 000000000..9a828ea43 --- /dev/null +++ b/cpp/src/mip_heuristics/feasibility_jump/cpu_fj_thread.cuh @@ -0,0 +1,73 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +template +struct fj_cpu_climber_t; + +template +class fj_t; + +template +struct cpu_fj_thread_t : public cpu_worker_thread_base_t> { + ~cpu_fj_thread_t(); + + void run_worker(); + void on_terminate(); + void on_start(); + bool get_result() { return cpu_fj_solution_found; } + + void stop_cpu_solver(); + + std::atomic cpu_fj_solution_found{false}; + f_t time_limit{+std::numeric_limits::infinity()}; + double work_unit_limit{std::numeric_limits::infinity()}; + std::unique_ptr> fj_cpu; + fj_t* fj_ptr{nullptr}; +}; + +template +struct fj_cpu_task_t { + struct fj_cpu_deleter_t { + void operator()(fj_cpu_climber_t* ptr) const; + }; + std::atomic preemption_flag{false}; + std::unique_ptr, fj_cpu_deleter_t> fj_cpu; +}; + +template +std::unique_ptr> make_fj_cpu_task_from_host_lp( + const dual_simplex::lp_problem_t& problem, + const std::vector& variable_types, + const std::vector& seed_assignment, + const dual_simplex::simplex_solver_settings_t& settings, + std::function&, double)> improvement_callback, + std::string log_prefix); + +template +void run_fj_cpu_task(fj_cpu_task_t& task, + f_t time_limit = std::numeric_limits::infinity(), + double work_unit_limit = std::numeric_limits::infinity()); + +template +void stop_fj_cpu_task(fj_cpu_task_t& task); + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu index a105497b7..12758b509 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu @@ -7,6 +7,10 @@ #include +#include +#include + +#include "cpu_fj_thread.cuh" #include "feasibility_jump.cuh" #include "feasibility_jump_impl_common.cuh" #include "fj_cpu.cuh" @@ -18,9 +22,12 @@ #include #include +#include #include +#include #include #include +#include #include #include #include @@ -41,6 +48,53 @@ namespace cuopt::linear_programming::detail { +template +static f_t clamp_value(f_t value, f_t lower, f_t upper) +{ + return std::min(std::max(value, lower), upper); +} + +template +static void rebuild_reverse_matrix(i_t n_variables, + i_t n_constraints, + i_t nnz, + const std::vector& coefficients, + const std::vector& variables, + const std::vector& offsets, + std::vector& reverse_coefficients, + std::vector& reverse_constraints, + std::vector& reverse_offsets) +{ + reverse_offsets.assign(n_variables + 1, 0); + for (i_t row = 0; row < n_constraints; ++row) { + for (i_t p = offsets[row]; p < offsets[row + 1]; ++p) { + ++reverse_offsets[variables[p] + 1]; + } + } + std::partial_sum(reverse_offsets.begin(), reverse_offsets.end(), reverse_offsets.begin()); + + reverse_constraints.resize(nnz); + reverse_coefficients.resize(nnz); + std::vector next = reverse_offsets; + for (i_t row = 0; row < n_constraints; ++row) { + for (i_t p = offsets[row]; p < offsets[row + 1]; ++p) { + const i_t col = variables[p]; + const i_t dst = next[col]++; + reverse_constraints[dst] = row; + reverse_coefficients[dst] = coefficients[p]; + } + } +} + +template +void finalize_fj_cpu_host_initialization( + fj_cpu_climber_t& fj_cpu, + i_t n_variables, + i_t n_constraints, + i_t n_integer_vars, + i_t nnz, + const typename mip_solver_settings_t::tolerances_t& tolerances); + template thrust::tuple get_mtm_for_bound(const typename fj_t::climber_data_t::view_t& fj, i_t var_idx, @@ -792,9 +846,8 @@ static void apply_move(fj_cpu_climber_t& fj_cpu, fj_cpu.h_incumbent_objective - fj_cpu.settings.parameters.breakthrough_move_epsilon; fj_cpu.h_best_assignment = fj_cpu.h_assignment; fj_cpu.iterations_since_best = 0; - CUOPT_LOG_TRACE("%sCPUFJ: new best objective: %g", - fj_cpu.log_prefix.c_str(), - fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_incumbent_objective)); + CUOPT_LOG_TRACE( + "%sCPUFJ: new best objective: %g", fj_cpu.log_prefix.c_str(), fj_cpu.h_incumbent_objective); if (fj_cpu.improvement_callback) { double current_work_units = fj_cpu.work_units_elapsed.load(std::memory_order_acquire); fj_cpu.improvement_callback( @@ -829,7 +882,6 @@ static thrust::tuple find_mtm_move( fj_cpu_climber_t& fj_cpu, const std::vector& target_cstrs, bool localmin = false) { CPUFJ_NVTX_RANGE("CPUFJ::find_mtm_move"); - auto& problem = *fj_cpu.pb_ptr; raft::random::PCGenerator rng(fj_cpu.settings.seed + fj_cpu.iterations, 0, 0); @@ -1258,33 +1310,29 @@ static void init_fj_cpu(fj_cpu_climber_t& fj_cpu, fj_cpu.h_tabu_lastinc.resize(fj_cpu.pb_ptr->n_variables, 0); fj_cpu.iterations = 0; - // set pointers to host copies - // technically not 'device_span's but raft doesn't have a universal span. - // cuda::std::span? - fj_cpu.view.cstr_left_weights = - raft::device_span(fj_cpu.h_cstr_left_weights.data(), fj_cpu.h_cstr_left_weights.size()); - fj_cpu.view.cstr_right_weights = - raft::device_span(fj_cpu.h_cstr_right_weights.data(), fj_cpu.h_cstr_right_weights.size()); - fj_cpu.view.objective_weight = &fj_cpu.h_objective_weight; - fj_cpu.view.incumbent_assignment = - raft::device_span(fj_cpu.h_assignment.data(), fj_cpu.h_assignment.size()); - fj_cpu.view.incumbent_lhs = raft::device_span(fj_cpu.h_lhs.data(), fj_cpu.h_lhs.size()); - fj_cpu.view.incumbent_lhs_sumcomp = - raft::device_span(fj_cpu.h_lhs_sumcomp.data(), fj_cpu.h_lhs_sumcomp.size()); - fj_cpu.view.tabu_nodec_until = - raft::device_span(fj_cpu.h_tabu_nodec_until.data(), fj_cpu.h_tabu_nodec_until.size()); - fj_cpu.view.tabu_noinc_until = - raft::device_span(fj_cpu.h_tabu_noinc_until.data(), fj_cpu.h_tabu_noinc_until.size()); - fj_cpu.view.tabu_lastdec = - raft::device_span(fj_cpu.h_tabu_lastdec.data(), fj_cpu.h_tabu_lastdec.size()); - fj_cpu.view.tabu_lastinc = - raft::device_span(fj_cpu.h_tabu_lastinc.data(), fj_cpu.h_tabu_lastinc.size()); - fj_cpu.view.objective_vars = - raft::device_span(fj_cpu.h_objective_vars.data(), fj_cpu.h_objective_vars.size()); - fj_cpu.view.incumbent_objective = &fj_cpu.h_incumbent_objective; - fj_cpu.view.best_objective = &fj_cpu.h_best_objective; + finalize_fj_cpu_host_initialization(fj_cpu, + problem.n_variables, + problem.n_constraints, + problem.n_integer_vars, + problem.nnz, + problem.tolerances); +} + +template +static void set_host_data_view( + fj_cpu_climber_t& fj_cpu, + i_t n_variables, + i_t n_constraints, + i_t n_integer_vars, + i_t nnz, + const typename mip_solver_settings_t::tolerances_t& tolerances) +{ + fj_cpu.view.pb.tolerances = tolerances; + fj_cpu.view.pb.n_variables = n_variables; + fj_cpu.view.pb.n_integer_vars = n_integer_vars; + fj_cpu.view.pb.n_constraints = n_constraints; + fj_cpu.view.pb.nnz = nnz; - fj_cpu.view.settings = &fj_cpu.settings; fj_cpu.view.pb.constraint_lower_bounds = raft::device_span(fj_cpu.h_cstr_lb.data(), fj_cpu.h_cstr_lb.size()); fj_cpu.view.pb.constraint_upper_bounds = @@ -1295,6 +1343,8 @@ static void init_fj_cpu(fj_cpu_climber_t& fj_cpu, raft::device_span(fj_cpu.h_var_types.data(), fj_cpu.h_var_types.size()); fj_cpu.view.pb.is_binary_variable = raft::device_span(fj_cpu.h_is_binary_variable.data(), fj_cpu.h_is_binary_variable.size()); + fj_cpu.view.pb.binary_indices = + raft::device_span(fj_cpu.h_binary_indices.data(), fj_cpu.h_binary_indices.size()); fj_cpu.view.pb.coefficients = raft::device_span(fj_cpu.h_coefficients.data(), fj_cpu.h_coefficients.size()); fj_cpu.view.pb.offsets = raft::device_span(fj_cpu.h_offsets.data(), fj_cpu.h_offsets.size()); @@ -1308,13 +1358,61 @@ static void init_fj_cpu(fj_cpu_climber_t& fj_cpu, raft::device_span(fj_cpu.h_reverse_offsets.data(), fj_cpu.h_reverse_offsets.size()); fj_cpu.view.pb.objective_coefficients = raft::device_span(fj_cpu.h_obj_coeffs.data(), fj_cpu.h_obj_coeffs.size()); - fj_cpu.h_objective_vars.resize(problem.n_variables); +} + +template +void finalize_fj_cpu_host_initialization( + fj_cpu_climber_t& fj_cpu, + i_t n_variables, + i_t n_constraints, + i_t n_integer_vars, + i_t nnz, + const typename mip_solver_settings_t::tolerances_t& tolerances) +{ + raft::common::nvtx::range scope("finalize_fj_cpu_host_initialization"); + + cuopt_assert(n_variables >= 0, "invalid variable count"); + cuopt_assert(n_constraints >= 0, "invalid constraint count"); + cuopt_assert(fj_cpu.h_offsets.size() == static_cast(n_constraints + 1), + "invalid CSR offsets"); + cuopt_assert(fj_cpu.h_reverse_offsets.size() == static_cast(n_variables + 1), + "invalid reverse offsets"); + cuopt_assert(fj_cpu.h_assignment.size() == static_cast(n_variables), + "seed assignment size mismatch"); + + set_host_data_view(fj_cpu, n_variables, n_constraints, n_integer_vars, nnz, tolerances); + + fj_cpu.view.cstr_left_weights = + raft::device_span(fj_cpu.h_cstr_left_weights.data(), fj_cpu.h_cstr_left_weights.size()); + fj_cpu.view.cstr_right_weights = + raft::device_span(fj_cpu.h_cstr_right_weights.data(), fj_cpu.h_cstr_right_weights.size()); + fj_cpu.view.objective_weight = &fj_cpu.h_objective_weight; + fj_cpu.view.incumbent_assignment = + raft::device_span(fj_cpu.h_assignment.data(), fj_cpu.h_assignment.size()); + fj_cpu.view.incumbent_lhs = raft::device_span(fj_cpu.h_lhs.data(), fj_cpu.h_lhs.size()); + fj_cpu.view.incumbent_lhs_sumcomp = + raft::device_span(fj_cpu.h_lhs_sumcomp.data(), fj_cpu.h_lhs_sumcomp.size()); + fj_cpu.view.tabu_nodec_until = + raft::device_span(fj_cpu.h_tabu_nodec_until.data(), fj_cpu.h_tabu_nodec_until.size()); + fj_cpu.view.tabu_noinc_until = + raft::device_span(fj_cpu.h_tabu_noinc_until.data(), fj_cpu.h_tabu_noinc_until.size()); + fj_cpu.view.tabu_lastdec = + raft::device_span(fj_cpu.h_tabu_lastdec.data(), fj_cpu.h_tabu_lastdec.size()); + fj_cpu.view.tabu_lastinc = + raft::device_span(fj_cpu.h_tabu_lastinc.data(), fj_cpu.h_tabu_lastinc.size()); + fj_cpu.view.incumbent_objective = &fj_cpu.h_incumbent_objective; + fj_cpu.view.best_objective = &fj_cpu.h_best_objective; + fj_cpu.view.settings = &fj_cpu.settings; + + fj_cpu.h_objective_vars.resize(n_variables); auto end = std::copy_if( thrust::counting_iterator(0), - thrust::counting_iterator(problem.n_variables), + thrust::counting_iterator(n_variables), fj_cpu.h_objective_vars.begin(), [&fj_cpu](i_t idx) { return !fj_cpu.view.pb.integer_equal(fj_cpu.h_obj_coeffs[idx], (f_t)0); }); fj_cpu.h_objective_vars.resize(end - fj_cpu.h_objective_vars.begin()); + fj_cpu.view.objective_vars = + raft::device_span(fj_cpu.h_objective_vars.data(), fj_cpu.h_objective_vars.size()); fj_cpu.h_best_objective = +std::numeric_limits::infinity(); @@ -1323,7 +1421,7 @@ static void init_fj_cpu(fj_cpu_climber_t& fj_cpu, std::make_pair(0, fj_staged_score_t::zero())); fj_cpu.cached_cstr_bounds.resize(fj_cpu.h_reverse_coefficients.size()); - for (i_t var_idx = 0; var_idx < (i_t)fj_cpu.view.pb.n_variables; ++var_idx) { + for (i_t var_idx = 0; var_idx < n_variables; ++var_idx) { auto [offset_begin, offset_end] = reverse_range_for_var(fj_cpu, var_idx); for (i_t i = offset_begin; i < offset_end; ++i) { fj_cpu.cached_cstr_bounds[i] = @@ -1332,9 +1430,9 @@ static void init_fj_cpu(fj_cpu_climber_t& fj_cpu, } } - fj_cpu.flip_move_computed.resize(fj_cpu.view.pb.n_variables, false); - fj_cpu.var_bitmap.resize(fj_cpu.view.pb.n_variables, false); - fj_cpu.iter_mtm_vars.reserve(fj_cpu.view.pb.n_variables); + fj_cpu.flip_move_computed.resize(n_variables, false); + fj_cpu.var_bitmap.resize(n_variables, false); + fj_cpu.iter_mtm_vars.reserve(n_variables); recompute_lhs(fj_cpu); @@ -1342,6 +1440,125 @@ static void init_fj_cpu(fj_cpu_climber_t& fj_cpu, precompute_problem_features(fj_cpu); } +template +static std::unique_ptr> init_fj_cpu_from_host_lp( + const dual_simplex::lp_problem_t& problem, + const std::vector& variable_types, + const std::vector& seed_assignment, + const dual_simplex::simplex_solver_settings_t& settings, + std::atomic& preemption_flag) +{ + using f_t2 = typename type_2::type; + + cuopt_assert(variable_types.size() >= static_cast(problem.num_cols), + "variable type size mismatch"); + + typename mip_solver_settings_t::tolerances_t tolerances{}; + tolerances.absolute_tolerance = settings.primal_tol; + tolerances.relative_tolerance = settings.zero_tol; + tolerances.integrality_tolerance = settings.integer_tol; + tolerances.absolute_mip_gap = settings.absolute_mip_gap_tol; + tolerances.relative_mip_gap = settings.relative_mip_gap_tol; + + const i_t n_variables = problem.num_cols; + const i_t n_constraints = problem.num_rows; + + dual_simplex::csr_matrix_t csr_A(problem.num_rows, problem.num_cols, problem.A.nnz()); + problem.A.to_compressed_row(csr_A); + std::vector coefficients = csr_A.x; + std::vector variables = csr_A.j; + std::vector offsets = csr_A.row_start; + std::vector constraint_lower_bounds = problem.rhs; + std::vector constraint_upper_bounds = problem.rhs; + std::vector variable_bounds(n_variables); + std::vector cpufj_variable_types(n_variables); + std::vector is_binary_variable(n_variables, 0); + i_t n_integer_vars = 0; + + for (i_t j = 0; j < n_variables; ++j) { + variable_bounds[j] = f_t2{problem.lower[j], problem.upper[j]}; + const auto var_type = variable_types[j]; + cpufj_variable_types[j] = + var_type == dual_simplex::variable_type_t::CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; + + const bool is_integer = cpufj_variable_types[j] == var_t::INTEGER; + const bool is_binary = is_integer && + integer_equal(problem.lower[j], f_t{0}, settings.integer_tol) && + integer_equal(problem.upper[j], f_t{1}, settings.integer_tol); + if (is_integer) { ++n_integer_vars; } + if (is_binary) { is_binary_variable[j] = 1; } + } + + const i_t nnz = static_cast(variables.size()); + std::vector reverse_coefficients; + std::vector reverse_constraints; + std::vector reverse_offsets; + rebuild_reverse_matrix(n_variables, + n_constraints, + nnz, + coefficients, + variables, + offsets, + reverse_coefficients, + reverse_constraints, + reverse_offsets); + + std::vector projected_seed(n_variables, f_t{0}); + for (i_t j = 0; j < n_variables; ++j) { + f_t value = j < static_cast(seed_assignment.size()) ? seed_assignment[j] : f_t{0}; + value = clamp_value(value, problem.lower[j], problem.upper[j]); + if (variable_types[j] != dual_simplex::variable_type_t::CONTINUOUS) { + value = clamp_value(std::round(value), problem.lower[j], problem.upper[j]); + } + projected_seed[j] = value; + } + + fj_settings_t fj_settings; + fj_settings.mode = fj_mode_t::EXIT_NON_IMPROVING; + fj_settings.n_of_minimums_for_exit = std::numeric_limits::max(); + fj_settings.time_limit = std::numeric_limits::infinity(); + fj_settings.iteration_limit = std::numeric_limits::max(); + fj_settings.update_weights = true; + fj_settings.feasibility_run = false; + fj_settings.seed = cuopt::seed_generator::get_seed(); + + auto fj_cpu = std::make_unique>(preemption_flag); + fj_cpu->view = typename fj_t::climber_data_t::view_t{}; + fj_cpu->pb_ptr = nullptr; + fj_cpu->settings = fj_settings; + + fj_cpu->h_reverse_coefficients = std::move(reverse_coefficients); + fj_cpu->h_reverse_constraints = std::move(reverse_constraints); + fj_cpu->h_reverse_offsets = std::move(reverse_offsets); + fj_cpu->h_coefficients = std::move(coefficients); + fj_cpu->h_offsets = std::move(offsets); + fj_cpu->h_variables = std::move(variables); + fj_cpu->h_obj_coeffs = problem.objective; + fj_cpu->h_var_bounds = std::move(variable_bounds); + fj_cpu->h_cstr_lb = std::move(constraint_lower_bounds); + fj_cpu->h_cstr_ub = std::move(constraint_upper_bounds); + fj_cpu->h_var_types = std::move(cpufj_variable_types); + fj_cpu->h_is_binary_variable = std::move(is_binary_variable); + + fj_cpu->h_cstr_left_weights.resize(n_constraints, 1.0); + fj_cpu->h_cstr_right_weights.resize(n_constraints, 1.0); + fj_cpu->max_weight = 1.0; + fj_cpu->h_objective_weight = 0.0; + fj_cpu->h_assignment = projected_seed; + fj_cpu->h_best_assignment = std::move(projected_seed); + fj_cpu->h_lhs.resize(n_constraints); + fj_cpu->h_lhs_sumcomp.resize(n_constraints, 0); + fj_cpu->h_tabu_nodec_until.resize(n_variables, 0); + fj_cpu->h_tabu_noinc_until.resize(n_variables, 0); + fj_cpu->h_tabu_lastdec.resize(n_variables, 0); + fj_cpu->h_tabu_lastinc.resize(n_variables, 0); + fj_cpu->iterations = 0; + + finalize_fj_cpu_host_initialization( + *fj_cpu, n_variables, n_constraints, n_integer_vars, nnz, tolerances); + return fj_cpu; +} + template static void sanity_checks(fj_cpu_climber_t& fj_cpu) { @@ -1417,7 +1634,9 @@ std::unique_ptr> fj_t::create_cpu_climber( } template -static bool cpufj_solve_loop(fj_cpu_climber_t& fj_cpu, f_t in_time_limit) +static bool cpufj_solve_loop(fj_cpu_climber_t& fj_cpu, + f_t in_time_limit, + double work_unit_limit = std::numeric_limits::infinity()) { i_t local_mins = 0; auto loop_start = std::chrono::high_resolution_clock::now(); @@ -1518,7 +1737,7 @@ static bool cpufj_solve_loop(fj_cpu_climber_t& fj_cpu, f_t in_time_lim fj_cpu.total_violations += fj_cpu.view.excess_score(cstr_idx, fj_cpu.h_lhs[cstr_idx]); } if (fj_cpu.iterations % fj_cpu.log_interval == 0) { - CUOPT_LOG_TRACE( + CUOPT_LOG_DEBUG( "%sCPUFJ iteration: %d/%d, local mins: %d, best_objective: %g, viol: %zu, obj weight %g, " "maxw %g", fj_cpu.log_prefix.c_str(), @@ -1527,7 +1746,7 @@ static bool cpufj_solve_loop(fj_cpu_climber_t& fj_cpu, f_t in_time_lim ? fj_cpu.settings.iteration_limit : -1, local_mins, - fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_best_objective), + fj_cpu.h_best_objective, fj_cpu.violated_constraints.size(), fj_cpu.h_objective_weight, fj_cpu.max_weight); @@ -1553,6 +1772,7 @@ static bool cpufj_solve_loop(fj_cpu_climber_t& fj_cpu, f_t in_time_lim fj_cpu.work_units_elapsed += biased_work; if (fj_cpu.producer_sync != nullptr) { fj_cpu.producer_sync->notify_progress(); } + if (fj_cpu.work_units_elapsed >= work_unit_limit) { break; } } cuopt_func_call(sanity_checks(fj_cpu)); @@ -1592,7 +1812,7 @@ cpu_fj_thread_t::~cpu_fj_thread_t() template void cpu_fj_thread_t::run_worker() { - cpu_fj_solution_found = cpufj_solve_loop(*fj_cpu, time_limit); + cpu_fj_solution_found = cpufj_solve_loop(*fj_cpu, time_limit, work_unit_limit); } template @@ -1633,24 +1853,105 @@ std::unique_ptr> init_fj_cpu_standalone( return fj_cpu; } +template +void fj_cpu_task_t::fj_cpu_deleter_t::operator()(fj_cpu_climber_t* ptr) const +{ + delete ptr; +} + +template +std::unique_ptr> make_fj_cpu_task_from_host_lp( + const dual_simplex::lp_problem_t& problem, + const std::vector& variable_types, + const std::vector& seed_assignment, + const dual_simplex::simplex_solver_settings_t& settings, + std::function&, double)> improvement_callback, + std::string log_prefix) +{ + auto task = std::make_unique>(); + auto fj_cpu = init_fj_cpu_from_host_lp( + problem, variable_types, seed_assignment, settings, task->preemption_flag); + fj_cpu->log_prefix = std::move(log_prefix); + fj_cpu->improvement_callback = std::move(improvement_callback); + task->fj_cpu.reset(fj_cpu.release()); + return task; +} + +template +void run_fj_cpu_task(fj_cpu_task_t& task, f_t time_limit, double work_unit_limit) +{ + cuopt_assert(task.fj_cpu != nullptr, "CPUFJ task has no climber"); + auto& fj_cpu = *task.fj_cpu; + fj_cpu.halted = false; + cpufj_solve_loop(fj_cpu, time_limit, work_unit_limit); +} + +template +void stop_fj_cpu_task(fj_cpu_task_t& task) +{ + if (task.fj_cpu) { + auto& fj_cpu = *task.fj_cpu; + fj_cpu.preemption_flag = true; + fj_cpu.halted = true; + } +} + #if MIP_INSTANTIATE_FLOAT template class fj_t; template class cpu_fj_thread_t; +template struct fj_cpu_task_t; template std::unique_ptr> init_fj_cpu_standalone( problem_t& problem, solution_t& solution, std::atomic& preemption_flag, fj_settings_t settings); +template std::unique_ptr> make_fj_cpu_task_from_host_lp( + const dual_simplex::lp_problem_t& problem, + const std::vector& variable_types, + const std::vector& seed_assignment, + const dual_simplex::simplex_solver_settings_t& settings, + std::function&, double)> improvement_callback, + std::string log_prefix); +template void run_fj_cpu_task(fj_cpu_task_t& task, + float time_limit, + double work_unit_limit); +template void stop_fj_cpu_task(fj_cpu_task_t& task); +template void finalize_fj_cpu_host_initialization( + fj_cpu_climber_t& fj_cpu, + int n_variables, + int n_constraints, + int n_integer_vars, + int nnz, + const typename mip_solver_settings_t::tolerances_t& tolerances); #endif #if MIP_INSTANTIATE_DOUBLE template class fj_t; template class cpu_fj_thread_t; +template struct fj_cpu_task_t; template std::unique_ptr> init_fj_cpu_standalone( problem_t& problem, solution_t& solution, std::atomic& preemption_flag, fj_settings_t settings); +template std::unique_ptr> make_fj_cpu_task_from_host_lp( + const dual_simplex::lp_problem_t& problem, + const std::vector& variable_types, + const std::vector& seed_assignment, + const dual_simplex::simplex_solver_settings_t& settings, + std::function&, double)> improvement_callback, + std::string log_prefix); +template void run_fj_cpu_task(fj_cpu_task_t& task, + double time_limit, + double work_unit_limit); +template void stop_fj_cpu_task(fj_cpu_task_t& task); +template void finalize_fj_cpu_host_initialization( + fj_cpu_climber_t& fj_cpu, + int n_variables, + int n_constraints, + int n_integer_vars, + int nnz, + const typename mip_solver_settings_t::tolerances_t& tolerances); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh index 3263609a2..cfac5121d 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh @@ -16,8 +16,8 @@ #include #include +#include #include -#include #include #include @@ -193,23 +193,6 @@ struct fj_cpu_climber_t { std::atomic& preemption_flag; }; -template -struct cpu_fj_thread_t : public cpu_worker_thread_base_t> { - ~cpu_fj_thread_t(); - - void run_worker(); - void on_terminate(); - void on_start(); - bool get_result() { return cpu_fj_solution_found; } - - void stop_cpu_solver(); - - std::atomic cpu_fj_solution_found{false}; - f_t time_limit{+std::numeric_limits::infinity()}; - std::unique_ptr> fj_cpu; - fj_t* fj_ptr{nullptr}; -}; - // Standalone CPUFJ init for running without full fj_t infrastructure (avoids GPU allocations). // Used for early CPUFJ during presolve. template diff --git a/cpp/src/mip_heuristics/local_search/local_search.cu b/cpp/src/mip_heuristics/local_search/local_search.cu index b96b48a41..135f88992 100644 --- a/cpp/src/mip_heuristics/local_search/local_search.cu +++ b/cpp/src/mip_heuristics/local_search/local_search.cu @@ -125,7 +125,7 @@ void local_search_t::start_cpufj_lptopt_scratch_threads( solution_lp, default_weights, default_weights, 0., context.preempt_heuristic_solver_); scratch_cpu_fj_on_lp_opt.fj_cpu->log_prefix = "******* scratch on LP optimal: "; scratch_cpu_fj_on_lp_opt.fj_cpu->improvement_callback = - [&population](f_t obj, const std::vector& h_vec, double /*work_units*/) { + [&population, this](f_t obj, const std::vector& h_vec, double /*work_units*/) { population.add_external_solution(h_vec, obj, solution_origin_t::CPUFJ); if (obj < local_search_best_obj) { CUOPT_LOG_DEBUG("******* New local search best obj %g, best overall %g",