Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 92 additions & 11 deletions cpp/src/branch_and_bound/branch_and_bound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <branch_and_bound/pseudo_costs.hpp>

#include <cuts/cuts.hpp>
#include <mip_heuristics/feasibility_jump/cpu_fj_thread.cuh>
#include <mip_heuristics/presolve/conflict_graph/clique_table.cuh>

#include <dual_simplex/basis_solves.hpp>
Expand All @@ -25,6 +26,7 @@

#include <raft/core/nvtx.hpp>
#include <utilities/hashing.hpp>
#include <utilities/scope_guard.hpp>

#include <omp.h>

Expand Down Expand Up @@ -2224,13 +2226,37 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
f_t last_objective = root_objective_;
f_t root_relax_objective = root_objective_;

constexpr bool enable_root_cut_cpufj = true;
std::unique_ptr<detail::fj_cpu_task_t<i_t, f_t>> root_cut_cpufj_task;
auto root_cut_cpufj_improvement_callback =
[this](f_t obj, const std::vector<f_t>& assignment, double) {
std::vector<f_t> user_assignment;
uncrush_primal_solution(original_problem_, original_lp_, assignment, user_assignment);
settings_.log.debug("Root cut CPUFJ found solution with objective %.16e\n", obj);
set_new_solution(user_assignment);
};
Comment on lines +2231 to +2237
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | 🏗️ Heavy lift

Uncrush against the task’s LP snapshot, not the live root LP.

assignment comes from the host-LP snapshot used to build the CPUFJ task, but this callback decodes it against original_lp_, which the concurrent cut pass is still mutating. That can remap cut/slack columns incorrectly or race with add_cuts/remove_cuts, so a valid CPUFJ incumbent can be corrupted before set_new_solution() sees it. Please either capture the exact LP snapshot used for task creation or make the CPUFJ callback emit user-space assignments directly.

As per coding guidelines, flag missing synchronization for shared state in concurrent code.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@cpp/src/branch_and_bound/branch_and_bound.cpp` around lines 2231 - 2237, The
callback root_cut_cpufj_improvement_callback is decoding the host-LP assignment
against the live original_lp_ (via uncrush_primal_solution) which can be
concurrently mutated by add_cuts/remove_cuts; capture and use the exact LP
snapshot used to build the CPUFJ task (or change the task to emit user-space
assignments directly) so uncrush_primal_solution runs against an immutable
snapshot, and add synchronization (or assert/guideline comment) around shared
state mutation to flag the missing concurrency protection for
original_problem_/original_lp_ before calling set_new_solution().

auto stop_root_cut_cpufj = [&]() {
if (!root_cut_cpufj_task) { return; }
detail::stop_fj_cpu_task(*root_cut_cpufj_task);
root_cut_cpufj_task.reset();
};
cuopt::scope_guard root_cut_cpufj_guard([&]() { stop_root_cut_cpufj(); });

enum class cut_pass_action_t { CONTINUE, BREAK, RETURN };
struct cut_pass_result_t {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if I understand the need/logic for this. Can't we just launch a new cpufj task after each cut pass and wait until the next pass finishes and launch another cpufj task while staying in cut pass loop ?

cut_pass_action_t action{cut_pass_action_t::CONTINUE};
mip_status_t status{mip_status_t::UNSET};
};

f_t cut_generation_start_time = tic();
i_t cut_pool_size = 0;
for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
if (num_fractional == 0) {
set_solution_at_root(solution, cut_info);
return mip_status_t::OPTIMAL;
Comment on lines 2254 to 2256
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Run the existing clique-thread cleanup on these new root-cut exits.

These return paths bypass finish_clique_thread(), unlike the earlier root-level exits in this function. That leaves the async clique task running past solve() on these paths.

Proposed cleanup
   for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
     if (num_fractional == 0) {
       set_solution_at_root(solution, cut_info);
+      finish_clique_thread();
       return mip_status_t::OPTIMAL;
     }
@@
-    if (cut_pass_result.action == cut_pass_action_t::RETURN) { return cut_pass_result.status; }
+    if (cut_pass_result.action == cut_pass_action_t::RETURN) {
+      finish_clique_thread();
+      return cut_pass_result.status;
+    }

Also applies to: 2540-2541

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@cpp/src/branch_and_bound/branch_and_bound.cpp` around lines 2254 - 2256,
These return paths call set_solution_at_root(...) and return immediately, which
bypasses the existing async clique cleanup; insert a call to
finish_clique_thread() immediately before each early return so the async clique
task is stopped and joined. Specifically, add finish_clique_thread() just prior
to the set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL;
sequence shown, and make the same change at the other analogous exit that calls
set_solution_at_root and returns (the second location referenced in the
comment).

} else {
}

auto do_cut_pass = [&]() -> cut_pass_result_t {
#ifdef PRINT_FRACTIONAL_INFO
settings_.log.printf(
"Found %d fractional variables on cut pass %d\n", num_fractional, cut_pass);
Expand All @@ -2243,7 +2269,6 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
}
#endif

// Generate cuts and add them to the cut pool
f_t cut_start_time = tic();
bool problem_feasible = cut_generation.generate_cuts(original_lp_,
settings_,
Expand All @@ -2263,7 +2288,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
settings_.heuristic_preemption_callback();
}
finish_clique_thread();
return mip_status_t::INFEASIBLE;
return {cut_pass_action_t::RETURN, mip_status_t::INFEASIBLE};
}
f_t cut_generation_time = toc(cut_start_time);
if (cut_generation_time > 1.0) {
Expand All @@ -2279,7 +2304,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
std::vector<f_t> cut_rhs;
std::vector<cut_type_t> cut_types;
i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types);
if (num_cuts == 0) { break; }
if (num_cuts == 0) { return {cut_pass_action_t::BREAK, mip_status_t::UNSET}; }
cut_info.record_cut_types(cut_types);
#ifdef PRINT_CUT_POOL_TYPES
cut_pool.print_cutpool_types();
Expand All @@ -2293,7 +2318,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
for (i_t i = 0; i < static_cast<i_t>(cut_types.size()); ++i) {
settings_.log.printf("row %d cut type %d\n", i, cut_types[i]);
}
return mip_status_t::NUMERICAL;
return {cut_pass_action_t::RETURN, mip_status_t::NUMERICAL};
}
#endif
// Check against saved solution
Expand Down Expand Up @@ -2333,7 +2358,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
}
if (add_cuts_status != 0) {
settings_.log.printf("Failed to add cuts\n");
return mip_status_t::NUMERICAL;
return {cut_pass_action_t::RETURN, mip_status_t::NUMERICAL};
}

if (settings_.reduced_cost_strengthening >= 1 && upper_bound_.load() < last_upper_bound) {
Expand Down Expand Up @@ -2377,7 +2402,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
#ifdef WRITE_BOUND_STRENGTHENING_INFEASIBLE_MPS
original_lp_.write_mps("bound_strengthening_infeasible.mps");
#endif
return mip_status_t::INFEASIBLE;
return {cut_pass_action_t::RETURN, mip_status_t::INFEASIBLE};
}

i_t iter = 0;
Expand Down Expand Up @@ -2405,7 +2430,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
if (cut_status == dual::status_t::TIME_LIMIT) {
solver_status_ = mip_status_t::TIME_LIMIT;
set_final_solution(solution, root_objective_);
return solver_status_;
return {cut_pass_action_t::RETURN, solver_status_};
}

if (cut_status != dual::status_t::OPTIMAL) {
Expand All @@ -2430,7 +2455,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
#ifdef WRITE_CUT_INFEASIBLE_MPS
original_lp_.write_mps("cut_infeasible.mps");
#endif
return mip_status_t::NUMERICAL;
return {cut_pass_action_t::RETURN, mip_status_t::NUMERICAL};
}
}
root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);
Expand Down Expand Up @@ -2475,7 +2500,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
if (rel_gap < settings_.relative_mip_gap_tol || abs_gap < settings_.absolute_mip_gap_tol) {
if (num_fractional == 0) { set_solution_at_root(solution, cut_info); }
set_final_solution(solution, root_objective_);
return mip_status_t::OPTIMAL;
return {cut_pass_action_t::RETURN, mip_status_t::OPTIMAL};
}

f_t change_in_objective = root_objective_ - last_objective;
Expand All @@ -2487,9 +2512,47 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
"Change in objective %.16e is less than 1e-3 of root relax objective %.16e\n",
change_in_objective,
root_relax_objective);
break;
return {cut_pass_action_t::BREAK, mip_status_t::UNSET};
}
last_objective = root_objective_;
return {cut_pass_action_t::CONTINUE, mip_status_t::UNSET};
};

cut_pass_result_t cut_pass_result;
#pragma omp parallel num_threads(root_cut_cpufj_task ? 2 : 1)
{
#pragma omp single
{
if (root_cut_cpufj_task) {
#pragma omp task shared(root_cut_cpufj_task) default(none)
detail::run_fj_cpu_task(*root_cut_cpufj_task,
std::numeric_limits<f_t>::infinity(),
std::numeric_limits<f_t>::infinity());
}

cut_pass_result = do_cut_pass();

if (root_cut_cpufj_task) { detail::stop_fj_cpu_task(*root_cut_cpufj_task); }
#pragma omp taskwait
}
}

if (cut_pass_result.action == cut_pass_action_t::RETURN) { return cut_pass_result.status; }
if (cut_pass_result.action == cut_pass_action_t::BREAK) { break; }

if (enable_root_cut_cpufj && !settings_.deterministic && settings_.num_threads >= 2 &&
cut_pass + 1 < settings_.max_cut_passes) {
f_t root_cut_cpufj_build_start_time = tic();
root_cut_cpufj_task =
detail::make_fj_cpu_task_from_host_lp<i_t, f_t>(original_lp_,
var_types_,
root_relax_soln_.x,
settings_,
root_cut_cpufj_improvement_callback,
"[RootCut CPUFJ] ");
settings_.log.debug("Root cut CPUFJ problem build time after pass %d: %.6f seconds\n",
cut_pass,
toc(root_cut_cpufj_build_start_time));
}
}

Expand All @@ -2504,6 +2567,24 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
original_lp_.A.col_start[original_lp_.A.n]);
}

if (enable_root_cut_cpufj && cut_info.has_cuts()) {
f_t root_cut_cpufj_build_start_time = tic();
root_cut_cpufj_task =
detail::make_fj_cpu_task_from_host_lp<i_t, f_t>(original_lp_,
var_types_,
root_relax_soln_.x,
settings_,
root_cut_cpufj_improvement_callback,
"[RootCut CPUFJ] ");
settings_.log.debug("Root cut CPUFJ final problem build time: %.6f seconds\n",
toc(root_cut_cpufj_build_start_time));
f_t fj_time_limit = settings_.deterministic
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does undetermistic path get 1s?

? f_t(settings_.time_limit - toc(exploration_stats_.start_time))
: f_t{1};
detail::run_fj_cpu_task(*root_cut_cpufj_task, fj_time_limit, 0.5);
root_cut_cpufj_task.reset();
}

set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_);

pc_.resize(original_lp_.num_cols);
Expand Down
73 changes: 73 additions & 0 deletions cpp/src/mip_heuristics/feasibility_jump/cpu_fj_thread.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */

#pragma once

#include <dual_simplex/presolve.hpp>
#include <dual_simplex/simplex_solver_settings.hpp>
#include <mip_heuristics/utilities/cpu_worker_thread.cuh>

#include <atomic>
#include <functional>
#include <limits>
#include <memory>
#include <string>
#include <vector>

namespace cuopt::linear_programming::detail {

template <typename i_t, typename f_t>
struct fj_cpu_climber_t;

template <typename i_t, typename f_t>
class fj_t;

template <typename i_t, typename f_t>
struct cpu_fj_thread_t : public cpu_worker_thread_base_t<cpu_fj_thread_t<i_t, f_t>> {
~cpu_fj_thread_t();

void run_worker();
void on_terminate();
void on_start();
bool get_result() { return cpu_fj_solution_found; }

void stop_cpu_solver();

std::atomic<bool> cpu_fj_solution_found{false};
f_t time_limit{+std::numeric_limits<f_t>::infinity()};
double work_unit_limit{std::numeric_limits<double>::infinity()};
std::unique_ptr<fj_cpu_climber_t<i_t, f_t>> fj_cpu;
fj_t<i_t, f_t>* fj_ptr{nullptr};
};

template <typename i_t, typename f_t>
struct fj_cpu_task_t {
struct fj_cpu_deleter_t {
void operator()(fj_cpu_climber_t<i_t, f_t>* ptr) const;
};
std::atomic<bool> preemption_flag{false};
std::unique_ptr<fj_cpu_climber_t<i_t, f_t>, fj_cpu_deleter_t> fj_cpu;
};

template <typename i_t, typename f_t>
std::unique_ptr<fj_cpu_task_t<i_t, f_t>> make_fj_cpu_task_from_host_lp(
const dual_simplex::lp_problem_t<i_t, f_t>& problem,
const std::vector<dual_simplex::variable_type_t>& variable_types,
const std::vector<f_t>& seed_assignment,
const dual_simplex::simplex_solver_settings_t<i_t, f_t>& settings,
std::function<void(f_t, const std::vector<f_t>&, double)> improvement_callback,
std::string log_prefix);

template <typename i_t, typename f_t>
void run_fj_cpu_task(fj_cpu_task_t<i_t, f_t>& task,
f_t time_limit = std::numeric_limits<f_t>::infinity(),
double work_unit_limit = std::numeric_limits<double>::infinity());

template <typename i_t, typename f_t>
void stop_fj_cpu_task(fj_cpu_task_t<i_t, f_t>& task);

} // namespace cuopt::linear_programming::detail
Loading
Loading