diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index edac39bf18..478ce94ff1 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -292,6 +292,42 @@ void run_single_file_mp(std::string file_path, exit(sol_found); } +int bind_process_to_cpu_partition(int gpu_id, int n_gpus) +{ + cpu_set_t parent_mask; + CPU_ZERO(&parent_mask); + + if (sched_getaffinity(0, sizeof(parent_mask), &parent_mask) != 0) { + perror("sched_getaffinity"); + return -1; + } + + std::vector visible_cpus; + for (int cpu = 0; cpu < CPU_SETSIZE; ++cpu) { + if (CPU_ISSET(cpu, &parent_mask)) { visible_cpus.push_back(cpu); } + } + + int cpus_per_gpu = std::max(1, static_cast(visible_cpus.size()) / n_gpus); + int start = gpu_id * cpus_per_gpu; + int end = std::min(start + cpus_per_gpu, static_cast(visible_cpus.size())); + + if (start >= end) { return -1; } + + cpu_set_t child_mask; + CPU_ZERO(&child_mask); + + for (int i = start; i < end; ++i) { + CPU_SET(visible_cpus[i], &child_mask); + std::cout << "Binding process to CPU " << visible_cpus[i] << std::endl; + } + + if (sched_setaffinity(0, sizeof(child_mask), &child_mask) != 0) { + perror("sched_setaffinity"); + return -1; + } + return end - start; +} + void return_gpu_to_the_queue(std::unordered_map& pid_gpu_map, std::unordered_map& pid_file_map, std::queue& gpu_queue) @@ -416,17 +452,6 @@ int main(int argc, char* argv[]) int reliability_branching = program.get("--reliability-branching"); bool deterministic = program.get("--determinism"); - if (num_cpu_threads < 0) { - num_cpu_threads = omp_get_max_threads() / n_gpus; - // std::ifstream smt_file("/sys/devices/system/cpu/smt/active"); - // if (smt_file.is_open()) { - // int smt_active = 0; - // smt_file >> smt_active; - // if (smt_active) { num_cpu_threads /= 2; } - // } - num_cpu_threads = std::max(num_cpu_threads, 1); - } - if (program.is_used("--out-dir")) { out_dir = program.get("--out-dir"); result_file = out_dir + "/final_result.csv"; @@ -501,6 +526,9 @@ int main(int argc, char* argv[]) } if (sys_pid == 0) { RAFT_CUDA_TRY(cudaSetDevice(gpu_id)); + int assigned_cpus = bind_process_to_cpu_partition(gpu_id, n_gpus); + omp_set_num_threads(assigned_cpus); + num_cpu_threads = assigned_cpus; run_single_file_mp(file_name, gpu_id, batch_num, @@ -533,6 +561,22 @@ int main(int argc, char* argv[]) merge_result_files(out_dir, result_file, n_gpus, batch_num); } else { auto memory_resource = make_async(); + auto run_single = [&]() { + run_single_file(path, + 0, + 0, + n_gpus, + out_dir, + initial_solution_file, + heuristics_only, + num_cpu_threads, + write_log_file, + log_to_console, + reliability_branching, + time_limit, + work_limit, + deterministic); + }; if (memory_limit > 0) { auto limiting_adaptor = rmm::mr::limiting_resource_adaptor(memory_resource, memory_limit * 1024ULL * 1024ULL); @@ -544,20 +588,7 @@ int main(int argc, char* argv[]) } else { rmm::mr::set_current_device_resource(memory_resource); } - run_single_file(path, - 0, - 0, - n_gpus, - out_dir, - initial_solution_file, - heuristics_only, - num_cpu_threads, - write_log_file, - log_to_console, - reliability_branching, - time_limit, - work_limit, - deterministic); + run_single(); } return 0; diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index e69ff7b9a5..d5c2d2b8f9 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -299,6 +300,7 @@ branch_and_bound_t::branch_and_bound_t( template f_t branch_and_bound_t::get_lower_bound() { + const f_t upper_bound = upper_bound_.load(); f_t lower_bound = lower_bound_ceiling_.load(); f_t heap_lower_bound = node_queue_.get_lower_bound(); f_t worker_lower_bound = worker_pool_.get_lower_bound(); @@ -306,9 +308,9 @@ f_t branch_and_bound_t::get_lower_bound() lower_bound = std::min(worker_lower_bound, lower_bound); if (std::isfinite(lower_bound)) { - return lower_bound; + return std::min(lower_bound, upper_bound); } else if (std::isfinite(root_objective_)) { - return root_objective_; + return std::min(root_objective_, upper_bound); } else { return -inf; } @@ -2075,7 +2077,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut cuopt::timer_t timer(std::numeric_limits::infinity()); std::shared_ptr> table; detail::find_initial_cliques( - problem_copy, tolerances_for_clique, &table, timer, false, signal_ptr); + problem_copy, tolerances_for_clique, &table, timer, signal_ptr); + if (clique_publish_callback_) { clique_publish_callback_(table); } return table; }); } diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index f2917ba930..633e848315 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -120,6 +120,13 @@ class branch_and_bound_t { void set_concurrent_lp_root_solve(bool enable) { enable_concurrent_lp_root_solve_ = enable; } + using clique_publish_callback_t = + std::function>)>; + void set_clique_publish_callback(clique_publish_callback_t cb) + { + clique_publish_callback_ = std::move(cb); + } + // Seed the global upper bound from an external source (e.g., early FJ during presolve). // `bound` must be in B&B's internal objective space. void set_initial_upper_bound(f_t bound); @@ -164,6 +171,7 @@ class branch_and_bound_t { std::shared_ptr> clique_table_; std::future>> clique_table_future_; std::atomic signal_extend_cliques_{false}; + clique_publish_callback_t clique_publish_callback_; work_limit_context_t work_unit_context_{"B&B"}; diff --git a/cpp/src/cuts/cuts.cpp b/cpp/src/cuts/cuts.cpp index 6d7d97ef0a..698c187564 100644 --- a/cpp/src/cuts/cuts.cpp +++ b/cpp/src/cuts/cuts.cpp @@ -80,9 +80,9 @@ clique_cut_build_status_t build_clique_cut(const std::vector& clique_vertic cut.i.clear(); cut.x.clear(); - i_t num_complements = 0; std::unordered_set seen_original; std::unordered_set seen_complement; + std::unordered_set complement_pairs; // vars that appear as both literal and complement seen_original.reserve(clique_vertices.size()); seen_complement.reserve(clique_vertices.size()); for (const auto vertex_idx : clique_vertices) { @@ -97,55 +97,93 @@ clique_cut_build_status_t build_clique_cut(const std::vector& clique_vertic cuopt_assert(lower_bound >= -bound_tol, "Clique variable lower bound below zero"); cuopt_assert(upper_bound <= 1 + bound_tol, "Clique variable upper bound above one"); - // we store the cut in the form of >= 1, for easy violation check with dot product - // that's why compelements have 1 as coeff and normal vars have -1 if (complement) { - if (seen_original.count(var_idx) > 0) { - // FIXME: this is temporary, fix all the vars of all other vars in the clique - return clique_cut_build_status_t::NO_CUT; - CLIQUE_CUTS_DEBUG("build_clique_cut infeasible var=%lld appears as variable and complement", - static_cast(var_idx)); - return clique_cut_build_status_t::INFEASIBLE; - } cuopt_assert(seen_complement.count(var_idx) == 0, "Duplicate complement in clique"); + if (seen_original.count(var_idx) > 0) { complement_pairs.insert(var_idx); } seen_complement.insert(var_idx); - num_complements++; - cut.i.push_back(var_idx); - cut.x.push_back(1.0); } else { - if (seen_complement.count(var_idx) > 0) { - // FIXME: this is temporary, fix all the vars of all other vars in the clique - return clique_cut_build_status_t::NO_CUT; - CLIQUE_CUTS_DEBUG("build_clique_cut infeasible var=%lld appears as variable and complement", - static_cast(var_idx)); - return clique_cut_build_status_t::INFEASIBLE; - } cuopt_assert(seen_original.count(var_idx) == 0, "Duplicate variable in clique"); + if (seen_complement.count(var_idx) > 0) { complement_pairs.insert(var_idx); } seen_original.insert(var_idx); - cut.i.push_back(var_idx); - cut.x.push_back(-1.0); } } + // ≥ 2 complement pairs force two distinct variables each into {0} ∩ {1} ⇒ LP infeasible. + if (complement_pairs.size() >= 2) { + CLIQUE_CUTS_DEBUG("build_clique_cut infeasible: %lld complement-pairs", + static_cast(complement_pairs.size())); +#if DEBUG_CLIQUE_CUTS + std::fprintf(stderr, "[DEBUG_CLIQUE_CUTS] infeasible clique vertices:"); + for (const auto v : clique_vertices) { + std::fprintf( + stderr, " %lld%s", static_cast(v % num_vars), (v >= num_vars ? "'" : "")); + } + std::fprintf(stderr, "\n[DEBUG_CLIQUE_CUTS] paired vars:"); + for (const auto v : complement_pairs) { + std::fprintf(stderr, " %lld", static_cast(v)); + } + std::fprintf(stderr, "\n"); +#endif + return clique_cut_build_status_t::INFEASIBLE; + } + + // With one complement pair (x + (1-x) = 1), the inequality forces every + // other clique member to 0. Stored in >=-form for easy dot-product check. + const bool has_pair = complement_pairs.size() == 1; + i_t num_complements = 0; + for (const auto vertex_idx : clique_vertices) { + const i_t var_idx = vertex_idx % num_vars; + const bool complement = vertex_idx >= num_vars; + if (has_pair && complement_pairs.count(var_idx) > 0) { continue; } + cut.i.push_back(var_idx); + cut.x.push_back(complement ? 1.0 : -1.0); + if (complement) { num_complements++; } + } + if (cut.i.empty()) { CLIQUE_CUTS_DEBUG("build_clique_cut no_cut empty support"); return clique_cut_build_status_t::NO_CUT; } - cut_rhs = static_cast(num_complements - 1); + cut_rhs = has_pair ? static_cast(num_complements) : static_cast(num_complements - 1); cut.sort(); const f_t dot = cut.dot(xstar); const f_t violation = cut_rhs - dot; if (violation > min_violation) { CLIQUE_CUTS_DEBUG( - "build_clique_cut accepted nz=%lld rhs=%g dot=%g violation=%g threshold=%g complements=%lld", + "build_clique_cut accepted has_pair=%d nz=%lld rhs=%g dot=%g violation=%g threshold=%g " + "complements=%lld", + has_pair ? 1 : 0, static_cast(cut.i.size()), static_cast(cut_rhs), static_cast(dot), static_cast(violation), static_cast(min_violation), static_cast(num_complements)); +#if DEBUG_CLIQUE_CUTS + std::fprintf(stderr, "[DEBUG_CLIQUE_CUTS] cut (clique literals:"); + for (const auto v : clique_vertices) { + std::fprintf( + stderr, " %lld%s", static_cast(v % num_vars), (v >= num_vars ? "'" : "")); + } + std::fprintf(stderr, "): "); + for (size_t k = 0; k < cut.i.size(); ++k) { + std::fprintf(stderr, + "%s%g*x[%lld] ", + (cut.x[k] >= 0 ? "+" : ""), + static_cast(cut.x[k]), + static_cast(cut.i[k])); + } + std::fprintf(stderr, ">= %g (x*[k]: ", static_cast(cut_rhs)); + for (size_t k = 0; k < cut.i.size(); ++k) { + std::fprintf(stderr, + "%lld->%g ", + static_cast(cut.i[k]), + static_cast(xstar[cut.i[k]])); + } + std::fprintf(stderr, ")\n"); +#endif return clique_cut_build_status_t::CUT_ADDED; } CLIQUE_CUTS_DEBUG( @@ -356,6 +394,11 @@ void extend_clique_vertices(std::vector& clique_vertices, static_cast(clique_vertices.size())); const f_t initial_clique_size = static_cast(clique_vertices.size()); + const f_t addtl_cliques_scan_cost = static_cast(graph.var_clique_addtl.avg_slice_size()); + if (add_work_estimate( + addtl_cliques_scan_cost * initial_clique_size, work_estimate, max_work_estimate)) { + return; + } i_t smallest_degree = std::numeric_limits::max(); i_t smallest_degree_var = -1; for (auto v : clique_vertices) { @@ -367,6 +410,7 @@ void extend_clique_vertices(std::vector& clique_vertices, } } + if (add_work_estimate(addtl_cliques_scan_cost, work_estimate, max_work_estimate)) { return; } auto adj_set = graph.get_adj_set_of_var(smallest_degree_var); std::unordered_set clique_members(clique_vertices.begin(), clique_vertices.end()); std::vector candidates; @@ -1813,6 +1857,16 @@ bool cut_generation_t::generate_cuts(const lp_problem_t& lp, } } + // Generate implied bound cuts + if (settings.implied_bound_cuts != 0) { + f_t cut_start_time = tic(); + generate_implied_bound_cuts(lp, settings, var_types, xstar, start_time); + f_t cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings.log.debug("Implied bounds cut generation time %.2f seconds\n", cut_generation_time); + } + } + // Generate Clique cuts (last to give background clique table generation maximum time) if (settings.clique_cuts != 0) { f_t cut_start_time = tic(); @@ -1826,16 +1880,6 @@ bool cut_generation_t::generate_cuts(const lp_problem_t& lp, settings.log.debug("Clique cut generation time %.2f seconds\n", cut_generation_time); } } - - // Generate implied bound cuts - if (settings.implied_bound_cuts != 0) { - f_t cut_start_time = tic(); - generate_implied_bound_cuts(lp, settings, var_types, xstar, start_time); - f_t cut_generation_time = toc(cut_start_time); - if (cut_generation_time > 1.0) { - settings.log.debug("Implied bounds cut generation time %.2f seconds\n", cut_generation_time); - } - } return true; } @@ -1895,14 +1939,14 @@ bool cut_generation_t::generate_clique_cuts( CLIQUE_CUTS_DEBUG("generate_clique_cuts no clique table available, skipping"); return true; } - CLIQUE_CUTS_DEBUG("generate_clique_cuts using clique table first=%lld addtl=%lld", - static_cast(clique_table_->first.size()), - static_cast(clique_table_->addtl_cliques.size())); + CLIQUE_CUTS_DEBUG( + "generate_clique_cuts using clique table first=%lld addtl=%lld small_adj_edges=%lld", + static_cast(clique_table_->first.size()), + static_cast(clique_table_->addtl_cliques.size()), + static_cast(clique_table_->small_clique_adj.indices.size())); - if (clique_table_->first.empty() && clique_table_->addtl_cliques.empty()) { - CLIQUE_CUTS_DEBUG("generate_clique_cuts empty clique table, nothing to separate"); - return true; - } + // No early-exit on empty first/addtl: remove_small_cliques moves + // sub-threshold edges into small_clique_adj, which BK still sees. cuopt_assert(clique_table_->n_variables == num_vars, "Clique table variable count mismatch"); cuopt_assert(static_cast(num_vars) <= xstar.size(), "Clique cut xstar size mismatch"); @@ -1910,7 +1954,8 @@ bool cut_generation_t::generate_clique_cuts( const f_t min_violation = std::max(settings.primal_tol, static_cast(1e-6)); const f_t bound_tol = settings.primal_tol; const f_t min_weight = 1.0 + min_violation; - // TODO this can be problem dependent + // TODO: make problem-dependent. max_work_estimate=1e8 aligns with max_calls; + // time is ultimately bounded by settings.time_limit. const i_t max_calls = 100000; f_t work_estimate = 0.0; const f_t max_work_estimate = 1e8; @@ -1959,15 +2004,21 @@ bool cut_generation_t::generate_clique_cuts( work_estimate += 3.0 * static_cast(vertices.size()); if (work_estimate > max_work_estimate) { return true; } + const f_t addtl_cliques_scan_cost = + static_cast(clique_table_->var_clique_addtl.avg_slice_size()); std::vector> adj_local(vertices.size()); size_t total_adj_entries = 0; size_t kept_adj_entries = 0; for (size_t idx = 0; idx < vertices.size(); ++idx) { if (toc(start_time) >= settings.time_limit) { return true; } + work_estimate += 1.0 + addtl_cliques_scan_cost; + if (work_estimate > max_work_estimate) { return true; } i_t vertex_idx = vertices[idx]; // returns the complement as well auto adj_set = clique_table_->get_adj_set_of_var(vertex_idx); total_adj_entries += adj_set.size(); + work_estimate += static_cast(adj_set.size()); + if (work_estimate > max_work_estimate) { return true; } auto& adj = adj_local[idx]; adj.reserve(adj_set.size()); for (const auto neighbor : adj_set) { @@ -1979,23 +2030,19 @@ bool cut_generation_t::generate_clique_cuts( adj.push_back(local_neighbor); } kept_adj_entries += adj.size(); + work_estimate += 2.0 * static_cast(adj.size()); #ifdef ASSERT_MODE { + // {k, ¬k} as neighbors is legal (literal is implicitly fixed); only guard duplicates. std::unordered_set adj_global; adj_global.reserve(adj.size()); for (const auto neighbor : adj) { i_t v = vertices[neighbor]; cuopt_assert(adj_global.insert(v).second, "Duplicate neighbor in adjacency list"); - i_t complement = (v >= num_vars) ? (v - num_vars) : (v + num_vars); - cuopt_assert(adj_global.find(complement) == adj_global.end(), - "Adjacency list contains complementing variable"); } } #endif } - work_estimate += static_cast(vertices.size()) + static_cast(total_adj_entries) + - 2.0 * static_cast(kept_adj_entries); - if (work_estimate > max_work_estimate) { return true; } CLIQUE_CUTS_DEBUG("generate_clique_cuts adjacency raw_entries=%lld kept_entries=%lld", static_cast(total_adj_entries), static_cast(kept_adj_entries)); diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index 13649682a6..cc498c8276 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -36,6 +36,7 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/local_search/line_segment_search/line_segment_search.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/bounds_presolve.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/bounds_update_data.cu + ${CMAKE_CURRENT_SOURCE_DIR}/presolve/clique_activity_corrections.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conditional_bound_strengthening.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/multi_probe.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/probing_cache.cu diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index b8dc3d33bf..c2f81c0fb4 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -10,7 +10,6 @@ #include -#include #include #include #include @@ -252,32 +251,11 @@ bool diversity_manager_t::run_presolve(f_t time_limit, timer_t global_ problem_ptr->related_vars_time_limit = context.settings.heuristic_params.related_vars_time_limit; if (!global_timer.check_time_limit()) { trivial_presolve(*problem_ptr, remap_cache_ids); } if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } - // if (!presolve_timer.check_time_limit() && !context.settings.heuristics_only && - // !problem_ptr->empty) { - // f_t time_limit_for_clique_table = std::min(3., presolve_timer.remaining_time() / 5); - // timer_t clique_timer(time_limit_for_clique_table); - // dual_simplex::user_problem_t host_problem(problem_ptr->handle_ptr); - // problem_ptr->get_host_user_problem(host_problem); - // std::shared_ptr> clique_table; - // constexpr bool modify_problem_with_cliques = false; - // find_initial_cliques(host_problem, - // context.settings.tolerances, - // &clique_table, - // clique_timer, - // modify_problem_with_cliques, - // nullptr); - // if (modify_problem_with_cliques) { - // problem_ptr->set_constraints_from_host_user_problem(host_problem); - // cuopt_assert(host_problem.lower.size() == static_cast(problem_ptr->n_variables), - // "host lower bound size mismatch"); - // cuopt_assert(host_problem.upper.size() == static_cast(problem_ptr->n_variables), - // "host upper bound size mismatch"); - // std::vector all_var_indices(problem_ptr->n_variables); - // std::iota(all_var_indices.begin(), all_var_indices.end(), 0); - // problem_ptr->update_variable_bounds(all_var_indices, host_problem.lower, - // host_problem.upper); trivial_presolve(*problem_ptr, remap_cache_ids); - // } - // } + // The initial clique table is built asynchronously by B&B + // (find_initial_cliques) and published into problem_ptr->clique_table. + // Heuristics pick it up via the atomic snapshot accessor. We intentionally + // do not build a duplicate table here — it added 1-3 s to the critical path + // of presolve without materially changing what B&B produces. // May overconstrain if Papilo presolve has been run before if (context.settings.presolver == presolver_t::None) { if (!problem_ptr->empty) { diff --git a/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cu b/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cu index 6512ad05da..8a451f80cc 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cu +++ b/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cu @@ -400,7 +400,7 @@ bool bounds_repair_t::repair_problem(problem_t& problem, i_t curr_cstr = get_random_cstr(); // best way would be to check a variable cycle, but this is easier and more performant bool is_cycle = detect_cycle(curr_cstr); - if (is_cycle) { CUOPT_LOG_DEBUG("Repair: cycle detected at cstr %d", curr_cstr); } + if (is_cycle) { CUOPT_LOG_TRACE("Repair: cycle detected at cstr %d", curr_cstr); } // in parallel compute the best shift and best respective damage i_t n_candidates = compute_best_shift(problem, original_problem, curr_cstr); // if no candidate is there continue with another constraint diff --git a/cpp/src/mip_heuristics/presolve/bounds_presolve.cu b/cpp/src/mip_heuristics/presolve/bounds_presolve.cu index 0a7c9de41a..8c8e82fa0d 100644 --- a/cpp/src/mip_heuristics/presolve/bounds_presolve.cu +++ b/cpp/src/mip_heuristics/presolve/bounds_presolve.cu @@ -78,7 +78,10 @@ struct detect_infeas_redun_t { template bound_presolve_t::bound_presolve_t(mip_solver_context_t& context_, settings_t in_settings) - : context(context_), upd(*context.problem_ptr), settings(in_settings) + : context(context_), + upd(*context.problem_ptr), + clique_data(context.problem_ptr->handle_ptr->get_stream()), + settings(in_settings) { } @@ -88,6 +91,38 @@ void bound_presolve_t::resize(problem_t& problem) upd.resize(problem); host_lb.resize(problem.n_variables); host_ub.resize(problem.n_variables); + // Clique-group cache invalidation is handled by ensure_clique_data's fingerprint. +} + +template +void bound_presolve_t::ensure_clique_data(problem_t& pb) +{ + // Snapshot avoids racing with B&B's async clique-table publication and + // keeps the table alive for the duration of this call. + auto ct_snapshot = pb.get_clique_table_snapshot(); + auto* current_ct = ct_snapshot.get(); + const bool cache_hit = clique_data_built // + && last_built_problem == &pb // + && last_built_clique_table == current_ct // + && last_built_n_variables == pb.n_variables // + && last_built_n_constraints == pb.n_constraints // + && last_built_nnz == pb.nnz; + if (cache_hit) return; + + if (!current_ct) { + // Not attached yet; keep fingerprint mismatched so the next call rechecks. + clique_data.n_groups = 0; + clique_data_built = false; + return; + } + clique_data.build_from_host(pb, context.problem_ptr->reverse_original_ids, *current_ct); + upd.resize_clique_buffers(clique_data.n_groups, pb.handle_ptr->get_stream()); + last_built_problem = &pb; + last_built_clique_table = current_ct; + last_built_n_variables = pb.n_variables; + last_built_n_constraints = pb.n_constraints; + last_built_nnz = pb.nnz; + clique_data_built = true; } template @@ -119,10 +154,49 @@ bool bound_presolve_t::calculate_bounds_update(problem_t& pb constexpr auto n_threads = 256; upd.bounds_changed.set_value_async(zero, pb.handle_ptr->get_stream()); - update_bounds_kernel - <<get_stream()>>>(pb.view(), upd.view()); - RAFT_CHECK_CUDA(pb.handle_ptr->get_stream()); - i_t h_bounds_changed = upd.bounds_changed.value(pb.handle_ptr->get_stream()); + + auto stream = pb.handle_ptr->get_stream(); + + if (!clique_data.empty()) { + // Clique-aware path: compute per-group corrections, fold into activity, then update bounds. + constexpr i_t warp = raft::WarpSize; + compute_clique_corrections_kernel + <<>>(make_span(clique_data.group_member_offsets), + make_span(clique_data.group_member_vars), + make_span(clique_data.group_member_coeffs), + make_span(clique_data.group_constraint_ids), + make_span(upd.changed_constraints), + make_span(upd.lb), + make_span(upd.ub), + make_span(upd.group_max_correction), + make_span(upd.group_min_correction), + make_span(upd.group_max_pos), + make_span(upd.group_second_max_pos), + make_span(upd.group_min_neg), + make_span(upd.group_second_min_neg), + pb.tolerances.integrality_tolerance); + RAFT_CHECK_CUDA(stream); + + constexpr i_t apply_tpb = 128; + const i_t apply_blocks = (pb.n_constraints + apply_tpb - 1) / apply_tpb; + apply_clique_corrections_to_activity_kernel + <<>>(make_span(clique_data.constraint_group_offsets), + make_span(upd.group_max_correction), + make_span(upd.group_min_correction), + make_span(upd.changed_constraints), + make_span(upd.min_activity), + make_span(upd.max_activity), + pb.n_constraints); + RAFT_CHECK_CUDA(stream); + + update_bounds_kernel_cliq + <<>>(pb.view(), upd.view(), clique_data.view()); + } else { + update_bounds_kernel + <<>>(pb.view(), upd.view()); + } + RAFT_CHECK_CUDA(stream); + i_t h_bounds_changed = upd.bounds_changed.value(stream); return h_bounds_changed != zero; } @@ -174,6 +248,7 @@ termination_criterion_t bound_presolve_t::bound_update_loop(problem_t< i_t iter; upd.init_changed_constraints(pb.handle_ptr); + ensure_clique_data(pb); for (iter = 0; iter < settings.iteration_limit; ++iter) { calculate_activity(pb); if (timer.check_time_limit()) { diff --git a/cpp/src/mip_heuristics/presolve/bounds_presolve.cuh b/cpp/src/mip_heuristics/presolve/bounds_presolve.cuh index 8b57cc7019..7a0e61f8c2 100644 --- a/cpp/src/mip_heuristics/presolve/bounds_presolve.cuh +++ b/cpp/src/mip_heuristics/presolve/bounds_presolve.cuh @@ -19,6 +19,7 @@ #include #include "bounds_update_data.cuh" +#include "clique_activity_corrections.cuh" #include "utils.cuh" #include @@ -73,8 +74,20 @@ class bound_presolve_t { void copy_input_bounds(problem_t& pb); void calc_and_set_updated_constraint_bounds(problem_t& pb); + // Lazily build (or reuse) the clique group table; idempotent. + void ensure_clique_data(problem_t& pb); + mip_solver_context_t& context; bounds_update_data_t upd; + clique_group_table_t clique_data; + bool clique_data_built{false}; + // Fingerprint detecting problem swaps (local_search alternates between + // *problem_ptr and problem_with_objective_cut, which can share dims). + const problem_t* last_built_problem = nullptr; + const clique_table_t* last_built_clique_table = nullptr; + i_t last_built_n_variables = -1; + i_t last_built_n_constraints = -1; + i_t last_built_nnz = -1; std::vector host_lb; std::vector host_ub; settings_t settings; diff --git a/cpp/src/mip_heuristics/presolve/bounds_update_data.cu b/cpp/src/mip_heuristics/presolve/bounds_update_data.cu index 487549aa4a..2038f70bef 100644 --- a/cpp/src/mip_heuristics/presolve/bounds_update_data.cu +++ b/cpp/src/mip_heuristics/presolve/bounds_update_data.cu @@ -21,7 +21,13 @@ bounds_update_data_t::bounds_update_data_t(problem_t& proble ub(problem.n_variables, problem.handle_ptr->get_stream()), changed_constraints(problem.n_constraints, problem.handle_ptr->get_stream()), next_changed_constraints(problem.n_constraints, problem.handle_ptr->get_stream()), - changed_variables(problem.n_variables, problem.handle_ptr->get_stream()) + changed_variables(problem.n_variables, problem.handle_ptr->get_stream()), + group_max_correction(0, problem.handle_ptr->get_stream()), + group_min_correction(0, problem.handle_ptr->get_stream()), + group_max_pos(0, problem.handle_ptr->get_stream()), + group_second_max_pos(0, problem.handle_ptr->get_stream()), + group_min_neg(0, problem.handle_ptr->get_stream()), + group_second_min_neg(0, problem.handle_ptr->get_stream()) { } @@ -35,6 +41,20 @@ void bounds_update_data_t::resize(problem_t& problem) changed_constraints.resize(problem.n_constraints, problem.handle_ptr->get_stream()); next_changed_constraints.resize(problem.n_constraints, problem.handle_ptr->get_stream()); changed_variables.resize(problem.n_variables, problem.handle_ptr->get_stream()); + // Clique-group buffers are sized by group count, not problem dims; managed + // separately via resize_clique_buffers from ensure_clique_data. +} + +template +void bounds_update_data_t::resize_clique_buffers(i_t n_groups, + rmm::cuda_stream_view stream) +{ + group_max_correction.resize(n_groups, stream); + group_min_correction.resize(n_groups, stream); + group_max_pos.resize(n_groups, stream); + group_second_max_pos.resize(n_groups, stream); + group_min_neg.resize(n_groups, stream); + group_second_min_neg.resize(n_groups, stream); } template @@ -49,6 +69,12 @@ typename bounds_update_data_t::view_t bounds_update_data_t:: v.changed_constraints = make_span(changed_constraints); v.next_changed_constraints = make_span(next_changed_constraints); v.changed_variables = make_span(changed_variables); + v.group_max_correction = make_span(group_max_correction); + v.group_min_correction = make_span(group_min_correction); + v.group_max_pos = make_span(group_max_pos); + v.group_second_max_pos = make_span(group_second_max_pos); + v.group_min_neg = make_span(group_min_neg); + v.group_second_min_neg = make_span(group_second_min_neg); return v; } diff --git a/cpp/src/mip_heuristics/presolve/bounds_update_data.cuh b/cpp/src/mip_heuristics/presolve/bounds_update_data.cuh index e8b5e85864..e4ee82e7af 100644 --- a/cpp/src/mip_heuristics/presolve/bounds_update_data.cuh +++ b/cpp/src/mip_heuristics/presolve/bounds_update_data.cuh @@ -25,6 +25,17 @@ struct bounds_update_data_t { rmm::device_uvector next_changed_constraints; rmm::device_uvector changed_variables; + // Per-group dynamic state for clique-aware activity tightening. Sized to + // clique_group_table_t::n_groups; empty when not enabled. Clean-constraint + // groups retain previous-iteration values (still exact — see + // compute_clique_corrections_kernel header). + rmm::device_uvector group_max_correction; + rmm::device_uvector group_min_correction; + rmm::device_uvector group_max_pos; + rmm::device_uvector group_second_max_pos; + rmm::device_uvector group_min_neg; + rmm::device_uvector group_second_min_neg; + struct view_t { i_t* bounds_changed; raft::device_span min_activity; @@ -34,10 +45,18 @@ struct bounds_update_data_t { raft::device_span changed_constraints; raft::device_span next_changed_constraints; raft::device_span changed_variables; + + raft::device_span group_max_correction; + raft::device_span group_min_correction; + raft::device_span group_max_pos; + raft::device_span group_second_max_pos; + raft::device_span group_min_neg; + raft::device_span group_second_min_neg; }; bounds_update_data_t(problem_t& pb); void resize(problem_t& problem); + void resize_clique_buffers(i_t n_groups, rmm::cuda_stream_view stream); void init_changed_constraints(const raft::handle_t* handle_ptr); void prepare_for_next_iteration(const raft::handle_t* handle_ptr); view_t view(); diff --git a/cpp/src/mip_heuristics/presolve/bounds_update_helpers.cuh b/cpp/src/mip_heuristics/presolve/bounds_update_helpers.cuh index b3f2ac16c0..a20649fbca 100644 --- a/cpp/src/mip_heuristics/presolve/bounds_update_helpers.cuh +++ b/cpp/src/mip_heuristics/presolve/bounds_update_helpers.cuh @@ -9,6 +9,7 @@ #include #include #include "bounds_update_data.cuh" +#include "clique_activity_corrections.cuh" namespace cuopt::linear_programming::detail { @@ -381,4 +382,185 @@ __global__ void update_bounds_kernel(typename problem_t::view_t pb, } } +// Clique-aware variants. When peeling var v off a clique-corrected activity, +// add back the group's contribution "without v": +// max_a += (pos_v_b >= max1_g) ? max2_g : pos_v_b +// where b_v = sign_v * coeff_v is the effective literal coeff and (max1, max2) +// are the group's top-2 in literal space. The subsequent raw-coeff peel is +// unchanged because the activity was built from raw coefficients. +template +inline __device__ thrust::pair update_bounds_per_cnst_cliq( + typename problem_t::view_t pb, + i_t var_idx, + f_t coeff, + i_t cnst_idx, + f_t cnst_lb, + f_t cnst_ub, + typename bounds_update_data_t::view_t upd, + thrust::pair bnd, + thrust::pair old_bnd, + i_t group_id, + i_t member_sign, + typename clique_group_table_t::view_t cliq) +{ + auto min_a = upd.min_activity[cnst_idx]; + auto max_a = upd.max_activity[cnst_idx]; + if (check_infeasibility(min_a, + max_a, + cnst_lb, + cnst_ub, + pb.tolerances.absolute_tolerance, + pb.tolerances.relative_tolerance)) { + return bnd; + } + +#if CUOPT_DEBUG_CLIQUE_TIGHTENING + // Snapshot raw activity (undo apply_clique_corrections) for the stock + // baseline diff print below. + f_t raw_min_a = min_a; + f_t raw_max_a = max_a; + { + i_t g_begin = cliq.constraint_group_offsets[cnst_idx]; + i_t g_end = cliq.constraint_group_offsets[cnst_idx + 1]; + for (i_t g = g_begin; g < g_end; ++g) { + raw_max_a += upd.group_max_correction[g]; + raw_min_a += upd.group_min_correction[g]; + } + } +#endif + + // Per-variable clique adjustment in literal space (b_v = member_sign * coeff) + // before peeling off v's contribution. Top-2 stats are also in literal space. + if (group_id >= 0) { + cuopt_assert(group_id < cliq.n_groups, "clique group id out of range"); + cuopt_assert(member_sign == 1 || member_sign == -1, + "member_sign must be +/-1 when group_id >= 0"); + f_t b_v = static_cast(member_sign) * coeff; + f_t pos_v = (b_v > 0) ? b_v : f_t{0}; + f_t neg_v = (b_v < 0) ? b_v : f_t{0}; + f_t max1 = upd.group_max_pos[group_id]; + f_t max2 = upd.group_second_max_pos[group_id]; + f_t min1 = upd.group_min_neg[group_id]; + f_t min2 = upd.group_second_min_neg[group_id]; + cuopt_assert(max1 >= max2, "top-2 max invariant violated (per-cnst)"); + cuopt_assert(min1 <= min2, "top-2 min invariant violated (per-cnst)"); + f_t max_adj = (pos_v >= max1) ? max2 : pos_v; + f_t min_adj = (neg_v <= min1) ? min2 : neg_v; + max_a += max_adj; + min_a += min_adj; + } else { + cuopt_assert(member_sign == 0, "member_sign must be 0 when group_id == -1"); + } + + min_a -= (coeff < 0) ? coeff * thrust::get<1>(old_bnd) : coeff * thrust::get<0>(old_bnd); + max_a -= (coeff > 0) ? coeff * thrust::get<1>(old_bnd) : coeff * thrust::get<0>(old_bnd); + auto delta_min_act = cnst_ub - min_a; + auto delta_max_act = cnst_lb - max_a; + thrust::get<0>(bnd) = update_lb(thrust::get<0>(bnd), coeff, delta_min_act, delta_max_act); + thrust::get<1>(bnd) = update_ub(thrust::get<1>(bnd), coeff, delta_min_act, delta_max_act); + +#if CUOPT_DEBUG_CLIQUE_TIGHTENING + // Stock vs clique-aware per-cnst (lb, ub) diff print. Tightening typically + // fires on non-member vars, so don't restrict to group_id >= 0. + { + f_t s_min_a = raw_min_a; + f_t s_max_a = raw_max_a; + s_min_a -= (coeff < 0) ? coeff * thrust::get<1>(old_bnd) : coeff * thrust::get<0>(old_bnd); + s_max_a -= (coeff > 0) ? coeff * thrust::get<1>(old_bnd) : coeff * thrust::get<0>(old_bnd); + f_t s_dmin = cnst_ub - s_min_a; + f_t s_dmax = cnst_lb - s_max_a; + f_t s_lb = update_lb(thrust::get<0>(old_bnd), coeff, s_dmin, s_dmax); + f_t s_ub = update_ub(thrust::get<1>(old_bnd), coeff, s_dmin, s_dmax); + f_t c_lb = thrust::get<0>(bnd); + f_t c_ub = thrust::get<1>(bnd); + const f_t eps = (f_t)1e-9; + if ((c_lb > s_lb + eps || c_ub < s_ub - eps) && pb.variable_types[var_idx] == var_t::INTEGER) { + printf( + "[clique-tighten] var=%d cnst=%d group=%d coeff=%.6f | " + "stock per-cnst: lb=%.6f ub=%.6f | cliq per-cnst: lb=%.6f ub=%.6f\n", + (int)var_idx, + (int)cnst_idx, + (int)group_id, + (double)coeff, + (double)s_lb, + (double)s_ub, + (double)c_lb, + (double)c_ub); + } + } +#endif + + return bnd; +} + +template +__device__ void update_bounds_cliq(typename problem_t::view_t pb, + i_t var_idx, + i_t var_offset, + i_t var_degree, + bool is_int, + typename bounds_update_data_t::view_t upd, + thrust::pair old_bnd, + typename clique_group_table_t::view_t cliq) +{ + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + + i_t var_changed = upd.changed_variables[var_idx]; + if (!var_changed) { return; } + + auto bnd = old_bnd; + for (i_t i = threadIdx.x; i < var_degree; i += blockDim.x) { + auto cnst_idx = pb.reverse_constraints[var_offset + i]; + bool changed = upd.changed_constraints[cnst_idx] == 1; + if (!changed) { continue; } + auto a = pb.reverse_coefficients[var_offset + i]; + auto cnst_ub = pb.constraint_upper_bounds[cnst_idx]; + auto cnst_lb = pb.constraint_lower_bounds[cnst_idx]; + // group_id == -1 ⇒ v not in any clique group for this cnst (stock path). + cuopt_assert((i_t)(var_offset + i) < (i_t)cliq.reverse_group_id.size(), + "reverse_group_id index out of range"); + cuopt_assert((i_t)(var_offset + i) < (i_t)cliq.reverse_member_sign.size(), + "reverse_member_sign index out of range"); + i_t group_id = cliq.reverse_group_id[var_offset + i]; + i_t member_sign = cliq.reverse_member_sign[var_offset + i]; + cuopt_assert(group_id == -1 || (group_id >= 0 && group_id < cliq.n_groups), + "reverse_group_id carries invalid group id"); + bnd = update_bounds_per_cnst_cliq( + pb, var_idx, a, cnst_idx, cnst_lb, cnst_ub, upd, bnd, old_bnd, group_id, member_sign, cliq); + } + + thrust::get<0>(bnd) = BlockReduce(temp_storage).Reduce(thrust::get<0>(bnd), cuda::maximum()); + __syncthreads(); + thrust::get<1>(bnd) = BlockReduce(temp_storage).Reduce(thrust::get<1>(bnd), cuda::minimum()); + __shared__ bool changed; + if (threadIdx.x == 0) { changed = write_updated_bounds(pb, var_idx, is_int, upd, bnd, old_bnd); } + __syncthreads(); + for (i_t i = threadIdx.x; i < var_degree; i += blockDim.x) { + auto cnst_idx = pb.reverse_constraints[var_offset + i]; + if (changed) { atomicExch(&upd.next_changed_constraints[cnst_idx], 1); } + } +} + +template +__global__ void update_bounds_kernel_cliq(typename problem_t::view_t pb, + typename bounds_update_data_t::view_t upd, + typename clique_group_table_t::view_t cliq) +{ + i_t var_idx = blockIdx.x; + i_t var_offset = pb.reverse_offsets[var_idx]; + i_t var_degree = pb.reverse_offsets[var_idx + 1] - var_offset; + bool is_int = (pb.variable_types[var_idx] == var_t::INTEGER); + + auto old_bnd = thrust::make_pair(upd.lb[var_idx], upd.ub[var_idx]); + + auto skip = skip_update(old_bnd, pb.tolerances.integrality_tolerance); + if (skip) { + return; + } else { + update_bounds_cliq( + pb, var_idx, var_offset, var_degree, is_int, upd, old_bnd, cliq); + } +} + } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/presolve/clique_activity_corrections.cu b/cpp/src/mip_heuristics/presolve/clique_activity_corrections.cu new file mode 100644 index 0000000000..a22147a62d --- /dev/null +++ b/cpp/src/mip_heuristics/presolve/clique_activity_corrections.cu @@ -0,0 +1,544 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include "clique_activity_corrections.cuh" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +template +typename clique_group_table_t::view_t clique_group_table_t::view() +{ + view_t v; + v.group_constraint_ids = make_span(group_constraint_ids); + v.group_member_offsets = make_span(group_member_offsets); + v.group_member_vars = make_span(group_member_vars); + v.group_member_coeffs = make_span(group_member_coeffs); + v.constraint_group_offsets = make_span(constraint_group_offsets); + v.reverse_group_id = make_span(reverse_group_id); + v.reverse_member_sign = make_span(reverse_member_sign); + v.n_groups = n_groups; + return v; +} + +template +void clique_group_table_t::build_from_host( + problem_t& problem, + const std::vector& primary_reverse_original_ids, + clique_table_t& clique_table) +{ + n_groups = 0; + + const bool has_large_or_addtl = + !clique_table.first.empty() || !clique_table.addtl_cliques.empty(); + const bool has_small_adj = !clique_table.small_clique_adj.indices.empty(); + if (!has_large_or_addtl && !has_small_adj) { + CUOPT_LOG_TRACE("clique_group_table_t::build_from_host: no cliques, skipping"); + return; + } + + const i_t n_vars = problem.n_variables; + const i_t n_constraints = problem.n_constraints; + const i_t nnz = problem.nnz; + cuopt_assert(n_vars > 0, "problem has no variables"); + cuopt_assert(n_constraints >= 0, "n_constraints must be non-negative"); + cuopt_assert(nnz >= 0, "nnz must be non-negative"); + cuopt_assert(clique_table.var_clique_first.n_keys() >= n_vars || + clique_table.var_clique_first.indices.empty(), + "var_clique_first sized inconsistently with problem"); + + // Build clique-build-space → pb-space map. Identity for root / + // +objective-cut; remap via primary_reverse_original_ids[problem.original_ids] + // for fixed sub-problems. -1 means the build-var was fixed/removed. + const i_t n_build_vars = clique_table.n_variables; + const bool ids_match = (n_build_vars == n_vars); + std::vector build_var_to_pb; + if (ids_match) { + build_var_to_pb.resize(n_build_vars); + std::iota(build_var_to_pb.begin(), build_var_to_pb.end(), i_t{0}); + } else { + cuopt_assert( + !primary_reverse_original_ids.empty(), + "clique_table.n_variables differs from problem.n_variables but the caller provided no " + "primary_reverse_original_ids — cannot safely remap clique-build ids to this sub-problem."); + cuopt_assert((i_t)problem.original_ids.size() == n_vars, + "problem.original_ids must be sized to problem.n_variables to run clique-aware " + "propagation on a sub-problem"); + const i_t n_input = static_cast(primary_reverse_original_ids.size()); + build_var_to_pb.assign(n_build_vars, -1); + for (i_t p = 0; p < n_vars; ++p) { + i_t input_idx = problem.original_ids[p]; + if (input_idx < 0 || input_idx >= n_input) continue; + i_t build_idx = primary_reverse_original_ids[input_idx]; + if (build_idx < 0 || build_idx >= n_build_vars) continue; + cuopt_assert(build_var_to_pb[build_idx] == -1, + "Duplicate forward remap entry for a clique-build var"); + build_var_to_pb[build_idx] = p; + } + } + + // Remap literal vertex from clique-build to pb literal space; -1 if removed. + auto remap_vertex_to_pb = [&](i_t vertex) -> i_t { + cuopt_assert(vertex >= 0 && vertex < 2 * n_build_vars, + "vertex out of clique-build literal range"); + const bool neg = (vertex >= n_build_vars); + const i_t build_var = neg ? (vertex - n_build_vars) : vertex; + const i_t pb_var = build_var_to_pb[build_var]; + if (pb_var < 0) return -1; + return neg ? (n_vars + pb_var) : pb_var; + }; + + // --- Pull problem structure to host --------------------------------------- + auto& handle_ptr = problem.handle_ptr; + auto stream = handle_ptr->get_stream(); + + std::vector h_offsets = host_copy(problem.offsets, stream); + std::vector h_variables = host_copy(problem.variables, stream); + std::vector h_coefficients = host_copy(problem.coefficients, stream); + std::vector h_reverse_offsets = host_copy(problem.reverse_offsets, stream); + std::vector h_reverse_constr = host_copy(problem.reverse_constraints, stream); + std::vector h_is_binary = host_copy(problem.is_binary_variable, stream); + + // Materialize explicit (large + addtl) cliques as flat literal-vertex lists. + // Small cliques are handled separately per constraint via the pairwise CSR. + // Storing the effective literal coeff b_j = sign_j * a_j lets the kernel + // treat positive and complement literals uniformly. + std::vector> all_cliques; + all_cliques.reserve(clique_table.first.size() + clique_table.addtl_cliques.size()); + + auto underlying_var = [&](i_t vertex) -> i_t { + return vertex < n_vars ? vertex : (vertex - n_vars); + }; + auto literal_sign = [&](i_t vertex) -> i_t { return vertex < n_vars ? i_t{+1} : i_t{-1}; }; + + // Remap clique to pb literal space; drop members removed since clique-build. + // Cliques with <2 surviving members are dropped (trivially non-tightening). + auto push_remapped_clique = [&](const std::vector& src) { + std::vector dst; + dst.reserve(src.size()); + for (i_t v_build : src) { + i_t v_pb = remap_vertex_to_pb(v_build); + if (v_pb >= 0) dst.push_back(v_pb); + } + if (dst.size() >= 2) all_cliques.push_back(std::move(dst)); + }; + + for (auto const& clique : clique_table.first) { + push_remapped_clique(clique); + } + // Each addtl = {vertex_idx} ∪ first[clique_idx][start_pos_on_clique:] + for (auto const& addtl : clique_table.addtl_cliques) { + auto const& base = clique_table.first[addtl.clique_idx]; + std::vector mat; + mat.reserve(1 + base.size() - addtl.start_pos_on_clique); + mat.push_back(addtl.vertex_idx); + for (i_t i = addtl.start_pos_on_clique; i < (i_t)base.size(); ++i) { + mat.push_back(base[i]); + } + push_remapped_clique(mat); + } + + // Reverse index: underlying var → indices into all_cliques (pb space). + std::vector> var_to_cliques(n_vars); + for (i_t ci = 0; ci < (i_t)all_cliques.size(); ++ci) { + for (i_t vertex : all_cliques[ci]) { + var_to_cliques[underlying_var(vertex)].push_back(ci); + } + } + + // Small-clique adjacency: query the CSR directly when id spaces match, + // otherwise build a pb-space hash shadow (sub-problem path is cold). + std::unordered_map> adj_list_shadow; + if (!ids_match && has_small_adj) { + const auto& sc = clique_table.small_clique_adj; + const i_t sc_keys = sc.n_keys(); + for (i_t u_build = 0; u_build < sc_keys; ++u_build) { + const i_t u_slice = sc.slice_size(u_build); + if (u_slice == 0) continue; + i_t u_pb = remap_vertex_to_pb(u_build); + if (u_pb < 0) continue; + auto& set = adj_list_shadow[u_pb]; + for (const i_t* it = sc.slice_begin(u_build); it != sc.slice_end(u_build); ++it) { + i_t v_pb = remap_vertex_to_pb(*it); + if (v_pb >= 0) set.insert(v_pb); + } + if (set.empty()) adj_list_shadow.erase(u_pb); + } + } + const bool has_small_adj_for_build = ids_match ? has_small_adj : !adj_list_shadow.empty(); + + auto has_small_edge = [&](i_t u, i_t v) -> bool { + if (ids_match) { return clique_table.small_clique_adj.slice_contains(u, v); } + auto it = adj_list_shadow.find(u); + if (it == adj_list_shadow.end()) return false; + return it->second.count(v) > 0; + }; + + // Copy neighbors of seed_lit into `out`; returns false if seed has no + // neighbors (CSR slice for ids_match, unordered_set otherwise). + auto collect_neighbors = [&](i_t seed_lit, std::vector& out) -> bool { + out.clear(); + if (ids_match) { + const auto& sc = clique_table.small_clique_adj; + const i_t k = sc.slice_size(seed_lit); + if (k == 0) return false; + out.assign(sc.slice_begin(seed_lit), sc.slice_end(seed_lit)); + return true; + } + auto it = adj_list_shadow.find(seed_lit); + if (it == adj_list_shadow.end() || it->second.empty()) return false; + out.assign(it->second.begin(), it->second.end()); + return true; + }; + + // For each constraint, greedily partition its binary vars into groups: + // (1) explicit large/addtl cliques first, sorted largest-first; + // (2) then small-adj greedy maximal-clique extraction over unassigned binaries. + // Emit in constraint-id order for deterministic downstream summation. + + struct group_t { + i_t cnst_idx; + std::vector vars; + std::vector coeffs; + std::vector signs; // +/-1, parallel to vars/coeffs + }; + std::vector groups; + groups.reserve(all_cliques.size() * 2); + + i_t phase1_groups = 0; + i_t phase2_groups = 0; + + std::unordered_map var_to_coeff; + std::unordered_set relevant_clique_idx; + std::vector sorted_cliques; + std::unordered_set assigned_vars; + std::vector unassigned_binaries; + + // Emit a group, storing the effective literal coeff sign * a_var. + auto emit_group_from_members = + [&](i_t c, const std::vector& members, const std::vector& signs) -> bool { + if (members.size() < 2) return false; + cuopt_assert(members.size() == signs.size(), "members/signs size mismatch"); + group_t g; + g.cnst_idx = c; + g.vars.reserve(members.size()); + g.coeffs.reserve(members.size()); + g.signs.reserve(members.size()); + for (std::size_t k = 0; k < members.size(); ++k) { + i_t v = members[k]; + i_t s = signs[k]; + g.vars.push_back(v); + g.coeffs.push_back(static_cast(s) * var_to_coeff[v]); + g.signs.push_back(s); + assigned_vars.insert(v); + } + groups.push_back(std::move(g)); + return true; + }; + + for (i_t c = 0; c < n_constraints; ++c) { + var_to_coeff.clear(); + relevant_clique_idx.clear(); + sorted_cliques.clear(); + assigned_vars.clear(); + + i_t row_begin = h_offsets[c]; + i_t row_end = h_offsets[c + 1]; + for (i_t i = row_begin; i < row_end; ++i) { + i_t var = h_variables[i]; + var_to_coeff.emplace(var, h_coefficients[i]); + } + + // (1) Large/addtl cliques touching this constraint, largest-first. + for (auto& [var, coeff] : var_to_coeff) { + if (!h_is_binary[var]) continue; + for (i_t ci : var_to_cliques[var]) + relevant_clique_idx.insert(ci); + } + if (!relevant_clique_idx.empty()) { + sorted_cliques.assign(relevant_clique_idx.begin(), relevant_clique_idx.end()); + std::sort(sorted_cliques.begin(), sorted_cliques.end(), [&](i_t a, i_t b) { + return all_cliques[a].size() > all_cliques[b].size(); + }); + + for (i_t ci : sorted_cliques) { + std::vector members; + std::vector signs; + members.reserve(all_cliques[ci].size()); + signs.reserve(all_cliques[ci].size()); + for (i_t vertex : all_cliques[ci]) { + i_t var = underlying_var(vertex); + i_t sign = literal_sign(vertex); + auto it = var_to_coeff.find(var); + if (it == var_to_coeff.end()) continue; + if (!h_is_binary[var]) continue; + if (assigned_vars.count(var)) continue; + // Reject a clique containing both x and ~x (tautology). + bool already_in = false; + for (i_t uv : members) { + if (uv == var) { + already_in = true; + break; + } + } + if (already_in) continue; + members.push_back(var); + signs.push_back(sign); + } + if (emit_group_from_members(c, members, signs)) ++phase1_groups; + } + } + + // (2) Small-adj greedy maximal-clique extraction over remaining + // unassigned binaries. Nodes are literals; each underlying var appears + // at most once per group. + if (has_small_adj_for_build) { + unassigned_binaries.clear(); + for (auto& [var, _coeff] : var_to_coeff) { + if (h_is_binary[var] && !assigned_vars.count(var)) { unassigned_binaries.push_back(var); } + } + for (i_t seed_var : unassigned_binaries) { + if (assigned_vars.count(seed_var)) continue; + // Try both polarities; the second short-circuits if the first emitted. + for (i_t seed_sign : {i_t{+1}, i_t{-1}}) { + if (assigned_vars.count(seed_var)) break; + i_t seed_lit = (seed_sign == +1) ? seed_var : (n_vars + seed_var); + std::vector seed_neighbors; + if (!collect_neighbors(seed_lit, seed_neighbors)) continue; + + std::vector cand_lits; + for (i_t w_lit : seed_neighbors) { + if (w_lit == seed_lit) continue; + i_t w_var = underlying_var(w_lit); + if (!var_to_coeff.count(w_var)) continue; + if (!h_is_binary[w_var]) continue; + if (assigned_vars.count(w_var)) continue; + if (w_var == seed_var) continue; // reject x / ~x tautology + cand_lits.push_back(w_lit); + } + if (cand_lits.empty()) continue; + + std::vector clique_lits{seed_lit}; + std::vector clique_vars{seed_var}; + std::vector clique_signs{seed_sign}; + for (i_t w_lit : cand_lits) { + i_t w_var = underlying_var(w_lit); + i_t w_sign = literal_sign(w_lit); + bool dup = false; + for (i_t uv : clique_vars) { + if (uv == w_var) { + dup = true; + break; + } + } + if (dup) continue; + bool ok = true; + for (i_t u_lit : clique_lits) { + if (!has_small_edge(u_lit, w_lit)) { + ok = false; + break; + } + } + if (ok) { + clique_lits.push_back(w_lit); + clique_vars.push_back(w_var); + clique_signs.push_back(w_sign); + } + } + if (emit_group_from_members(c, clique_vars, clique_signs)) ++phase2_groups; + } + } + } + } + + n_groups = static_cast(groups.size()); + if (n_groups == 0) { + CUOPT_LOG_TRACE( + "clique_group_table_t::build_from_host: no (cnst, clique) pairs with ≥2 members"); + return; + } + + // Flatten into host CSR arrays. + i_t total_members = 0; + for (auto const& g : groups) + total_members += static_cast(g.vars.size()); + + std::vector h_group_constraint_ids(n_groups); + std::vector h_group_member_offsets(n_groups + 1, 0); + std::vector h_group_member_vars(total_members); + std::vector h_group_member_coeffs(total_members); + + i_t member_cursor = 0; + for (i_t g = 0; g < n_groups; ++g) { + h_group_constraint_ids[g] = groups[g].cnst_idx; + h_group_member_offsets[g] = member_cursor; + for (std::size_t k = 0; k < groups[g].vars.size(); ++k) { + h_group_member_vars[member_cursor] = groups[g].vars[k]; + h_group_member_coeffs[member_cursor] = groups[g].coeffs[k]; + ++member_cursor; + } + } + h_group_member_offsets[n_groups] = total_members; + + // constraint_group_offsets: count + prefix-sum (groups already sorted by c). + std::vector h_constraint_group_offsets(n_constraints + 1, 0); + for (i_t g = 0; g < n_groups; ++g) { + h_constraint_group_offsets[h_group_constraint_ids[g] + 1]++; + } + for (i_t c = 1; c <= n_constraints; ++c) { + h_constraint_group_offsets[c] += h_constraint_group_offsets[c - 1]; + } + + // reverse_group_id / reverse_member_sign: one entry per nnz reverse-CSR slot. + // group_id == -1 for non-members (sign 0); otherwise sign is +/-1. + std::vector h_reverse_group_id(nnz, -1); + std::vector h_reverse_member_sign(nnz, 0); + for (i_t g = 0; g < n_groups; ++g) { + i_t c = h_group_constraint_ids[g]; + for (i_t k = h_group_member_offsets[g]; k < h_group_member_offsets[g + 1]; ++k) { + i_t var = h_group_member_vars[k]; + i_t local = k - h_group_member_offsets[g]; + i_t sign = groups[g].signs[local]; + i_t rv_beg = h_reverse_offsets[var]; + i_t rv_end = h_reverse_offsets[var + 1]; + for (i_t r = rv_beg; r < rv_end; ++r) { + if (h_reverse_constr[r] == c) { + h_reverse_group_id[r] = g; + h_reverse_member_sign[r] = sign; + break; + } + } + } + } + + // Host-side invariant checks. + cuopt_assert((i_t)h_group_constraint_ids.size() == n_groups, "group_constraint_ids bad size"); + cuopt_assert((i_t)h_group_member_offsets.size() == n_groups + 1, "group_member_offsets bad size"); + cuopt_assert((i_t)h_group_member_vars.size() == total_members, "group_member_vars bad size"); + cuopt_assert((i_t)h_group_member_coeffs.size() == total_members, "group_member_coeffs bad size"); + cuopt_assert(h_group_member_offsets.front() == 0, "group_member_offsets[0] != 0"); + cuopt_assert(h_group_member_offsets.back() == total_members, + "group_member_offsets[n_groups] != total_members"); + cuopt_assert((i_t)h_constraint_group_offsets.size() == n_constraints + 1, + "constraint_group_offsets bad size"); + cuopt_assert(h_constraint_group_offsets.front() == 0, "constraint_group_offsets[0] != 0"); + cuopt_assert(h_constraint_group_offsets.back() == n_groups, + "constraint_group_offsets[n_constraints] != n_groups"); + cuopt_assert((i_t)h_reverse_group_id.size() == nnz, "reverse_group_id bad size"); + cuopt_assert((i_t)h_reverse_member_sign.size() == nnz, "reverse_member_sign bad size"); + for (i_t r = 0; r < nnz; ++r) { + if (h_reverse_group_id[r] == -1) { + cuopt_assert(h_reverse_member_sign[r] == 0, + "reverse_member_sign must be 0 when not a member"); + } else { + cuopt_assert(h_reverse_member_sign[r] == 1 || h_reverse_member_sign[r] == -1, + "reverse_member_sign must be +/-1 when a member"); + } + } + for (i_t g = 0; g < n_groups; ++g) { + cuopt_assert(h_group_member_offsets[g] <= h_group_member_offsets[g + 1], + "group_member_offsets not monotonic"); + cuopt_assert(h_group_member_offsets[g + 1] - h_group_member_offsets[g] >= 2, + "each group must have >=2 members"); + cuopt_assert(h_group_constraint_ids[g] >= 0 && h_group_constraint_ids[g] < n_constraints, + "group constraint id out of range"); + for (i_t k = h_group_member_offsets[g]; k < h_group_member_offsets[g + 1]; ++k) { + cuopt_assert(h_group_member_vars[k] >= 0 && h_group_member_vars[k] < n_vars, + "group member var out of range"); + } + } + for (i_t g = 1; g < n_groups; ++g) { + cuopt_assert(h_group_constraint_ids[g - 1] <= h_group_constraint_ids[g], + "groups not sorted by constraint id"); + } + + group_constraint_ids = device_copy(h_group_constraint_ids, stream); + group_member_offsets = device_copy(h_group_member_offsets, stream); + group_member_vars = device_copy(h_group_member_vars, stream); + group_member_coeffs = device_copy(h_group_member_coeffs, stream); + constraint_group_offsets = device_copy(h_constraint_group_offsets, stream); + reverse_group_id = device_copy(h_reverse_group_id, stream); + reverse_member_sign = device_copy(h_reverse_member_sign, stream); + + handle_ptr->sync_stream(); + + // Summary stats. + { + i_t min_size = std::numeric_limits::max(); + i_t max_size = 0; + i_t size_eq2 = 0, size_3_5 = 0, size_6_20 = 0, size_ge21 = 0; + for (i_t g = 0; g < n_groups; ++g) { + i_t sz = h_group_member_offsets[g + 1] - h_group_member_offsets[g]; + min_size = std::min(min_size, sz); + max_size = std::max(max_size, sz); + if (sz == 2) + ++size_eq2; + else if (sz <= 5) + ++size_3_5; + else if (sz <= 20) + ++size_6_20; + else + ++size_ge21; + } + const double avg_size = (double)total_members / (double)n_groups; + + i_t constraints_with_groups = 0; + i_t max_groups_per_cnst = 0; + for (i_t c = 0; c < n_constraints; ++c) { + i_t ng = h_constraint_group_offsets[c + 1] - h_constraint_group_offsets[c]; + if (ng > 0) ++constraints_with_groups; + max_groups_per_cnst = std::max(max_groups_per_cnst, ng); + } + + CUOPT_LOG_INFO( + "clique_group_table_t::build_from_host: n_groups=%d total_members=%d avg_size=%.2f " + "(min=%d max=%d) | size buckets: =2:%d, 3-5:%d, 6-20:%d, >=21:%d | " + "constraints_with_groups=%d/%d max_groups/cnst=%d | " + "phase1 (explicit cliques)=%d phase2 (small-adj)=%d", + n_groups, + total_members, + avg_size, + (n_groups > 0 ? min_size : 0), + max_size, + size_eq2, + size_3_5, + size_6_20, + size_ge21, + constraints_with_groups, + n_constraints, + max_groups_per_cnst, + phase1_groups, + phase2_groups); + CUOPT_LOG_INFO( + "clique_group_table_t::build_from_host: sources: large=%zu, addtl=%zu, " + "materialized=%zu, small_adj_edges=%zu", + clique_table.first.size(), + clique_table.addtl_cliques.size(), + all_cliques.size(), + clique_table.small_clique_adj.indices.size()); + } +} + +#if MIP_INSTANTIATE_FLOAT +template class clique_group_table_t; +#endif +#if MIP_INSTANTIATE_DOUBLE +template class clique_group_table_t; +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/presolve/clique_activity_corrections.cuh b/cpp/src/mip_heuristics/presolve/clique_activity_corrections.cuh new file mode 100644 index 0000000000..0d5e162587 --- /dev/null +++ b/cpp/src/mip_heuristics/presolve/clique_activity_corrections.cuh @@ -0,0 +1,268 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +// Set to 1 to printf when clique-aware activity tightening fires. +#ifndef CUOPT_DEBUG_CLIQUE_TIGHTENING +#define CUOPT_DEBUG_CLIQUE_TIGHTENING 0 +#endif + +namespace cuopt::linear_programming::detail { + +// Static CSR of non-overlapping clique groups per constraint. Built once on the +// host from clique_table_t and copied to device. Groups are sorted by +// constraint_id for deterministic per-constraint summation. Dynamic correction +// values live on bounds_update_data_t (per-probe lifetime, see +// resize_clique_buffers). +template +struct clique_group_table_t { + struct view_t { + raft::device_span group_constraint_ids; + raft::device_span group_member_offsets; + raft::device_span group_member_vars; + raft::device_span group_member_coeffs; + raft::device_span constraint_group_offsets; + raft::device_span reverse_group_id; + // Parallel to reverse_group_id: literal sign per (var, cnst) slot. + // +1 / -1 when group member, 0 otherwise. Top-2 stats live on the + // effective literal coeff b_j = sign_j * a_j, so the per-var adjustment + // needs the sign separately from the raw coeff. + raft::device_span reverse_member_sign; + i_t n_groups; + }; + + explicit clique_group_table_t(rmm::cuda_stream_view stream) + : group_constraint_ids(0, stream), + group_member_offsets(0, stream), + group_member_vars(0, stream), + group_member_coeffs(0, stream), + constraint_group_offsets(0, stream), + reverse_group_id(0, stream), + reverse_member_sign(0, stream) + { + } + + // Greedy non-overlapping partition of each constraint's binary members into + // clique groups: explicit large/addtl cliques first (largest first), then + // remaining unassigned binaries against small_clique_adj. Members store the + // effective literal coeff b_j = sign_j * a_j so the kernel handles positive + // and complement literals uniformly. `primary_reverse_original_ids` is the + // primary problem's N→M map for the sub-problem path; empty otherwise. + void build_from_host(problem_t& problem, + const std::vector& primary_reverse_original_ids, + clique_table_t& clique_table); + + view_t view(); + + bool empty() const noexcept { return n_groups == 0; } + + // Static (built once) + rmm::device_uvector group_constraint_ids; + rmm::device_uvector group_member_offsets; + rmm::device_uvector group_member_vars; + rmm::device_uvector group_member_coeffs; + rmm::device_uvector constraint_group_offsets; + rmm::device_uvector reverse_group_id; + rmm::device_uvector reverse_member_sign; + + i_t n_groups{0}; +}; + +// One warp per group. Each thread strides over members maintaining (best, +// second), then a butterfly warp reduction merges sums and top-2 pairs. +// Lane 0 writes the results. No atomics, deterministic. +// +// Skip when changed_constraints[c] == 0: the previously written corrections +// are still exact (no member bound changed). Both downstream consumers +// (apply_clique_corrections_to_activity_kernel, update_bounds_per_cnst_cliq) +// gate on the same flag, so unread slots are never observed stale. +template +__global__ void compute_clique_corrections_kernel(raft::device_span group_member_offsets, + raft::device_span group_member_vars, + raft::device_span group_member_coeffs, + raft::device_span group_constraint_ids, + raft::device_span changed_constraints, + raft::device_span lb, + raft::device_span ub, + raft::device_span group_max_correction, + raft::device_span group_min_correction, + raft::device_span group_max_pos, + raft::device_span group_second_max_pos, + raft::device_span group_min_neg, + raft::device_span group_second_min_neg, + f_t int_tol) +{ + static_assert(TPB == raft::WarpSize, + "compute_clique_corrections_kernel requires exactly one warp per block"); + + const i_t gid = blockIdx.x; + cuopt_assert(gid + 1 < (i_t)group_member_offsets.size(), "group id out of range"); + + // Skip clean constraint (uniform branch across the warp). + cuopt_assert(gid < (i_t)group_constraint_ids.size(), "group_constraint_ids index out of range"); + const i_t cnst = group_constraint_ids[gid]; + cuopt_assert(cnst >= 0 && cnst < (i_t)changed_constraints.size(), + "group constraint id out of range"); + if (changed_constraints[cnst] == 0) return; + + const i_t mem_begin = group_member_offsets[gid]; + const i_t mem_end = group_member_offsets[gid + 1]; + cuopt_assert(mem_begin <= mem_end, "group member offsets not monotonic"); + cuopt_assert(mem_end <= (i_t)group_member_vars.size(), "group member offsets exceed vars array"); + cuopt_assert(group_member_vars.size() == group_member_coeffs.size(), + "group member vars/coeffs size mismatch"); + + f_t sum_pos = 0, sum_neg = 0; + f_t max1 = 0, max2 = 0; // top-2 of max(0, coeff) (max1 >= max2) + f_t min1 = 0, min2 = 0; // top-2 of min(0, coeff) (min1 <= min2) + i_t n_unfixed = 0; + + for (i_t m = mem_begin + threadIdx.x; m < mem_end; m += TPB) { + i_t var = group_member_vars[m]; + f_t a = group_member_coeffs[m]; + cuopt_assert(var >= 0 && var < (i_t)lb.size(), "clique member var index out of range"); + if (ub[var] - lb[var] <= int_tol) continue; // fixed → skip + + n_unfixed++; + f_t pos = fmax(a, f_t{0}); + f_t neg = fmin(a, f_t{0}); + sum_pos += pos; + sum_neg += neg; + + if (pos > max1) { + max2 = max1; + max1 = pos; + } else if (pos > max2) { + max2 = pos; + } + if (neg < min1) { + min2 = min1; + min1 = neg; + } else if (neg < min2) { + min2 = neg; + } + } + + // Butterfly warp reduction; top-2 merge formula for sorted pairs (a1>=a2), + // (b1>=b2): new1 = max(a1,b1), new2 = max(min(a1,b1), max(a2,b2)). +#pragma unroll + for (int off = TPB / 2; off > 0; off >>= 1) { + sum_pos += __shfl_xor_sync(0xffffffff, sum_pos, off); + sum_neg += __shfl_xor_sync(0xffffffff, sum_neg, off); + n_unfixed += __shfl_xor_sync(0xffffffff, n_unfixed, off); + + f_t b1 = __shfl_xor_sync(0xffffffff, max1, off); + f_t b2 = __shfl_xor_sync(0xffffffff, max2, off); + f_t new_max1 = fmax(max1, b1); + f_t new_max2 = fmax(fmin(max1, b1), fmax(max2, b2)); + max1 = new_max1; + max2 = new_max2; + + f_t d1 = __shfl_xor_sync(0xffffffff, min1, off); + f_t d2 = __shfl_xor_sync(0xffffffff, min2, off); + f_t new_min1 = fmin(min1, d1); + f_t new_min2 = fmin(fmax(min1, d1), fmin(min2, d2)); + min1 = new_min1; + min2 = new_min2; + } + + if (threadIdx.x == 0) { + if (n_unfixed < 2) { + group_max_pos[gid] = 0; + group_second_max_pos[gid] = 0; + group_min_neg[gid] = 0; + group_second_min_neg[gid] = 0; + group_max_correction[gid] = 0; + group_min_correction[gid] = 0; + return; + } + cuopt_assert(max1 >= max2, "top-2 max invariant violated"); + cuopt_assert(min1 <= min2, "top-2 min invariant violated"); + cuopt_assert(sum_pos >= max1, "sum_pos < max1 is impossible"); + cuopt_assert(sum_neg <= min1, "sum_neg > min1 is impossible"); + group_max_pos[gid] = max1; + group_second_max_pos[gid] = max2; + group_min_neg[gid] = min1; + group_second_min_neg[gid] = min2; + f_t max_corr = sum_pos - max1; // >= 0 + f_t min_corr = sum_neg - min1; // <= 0 + group_max_correction[gid] = max_corr; + group_min_correction[gid] = min_corr; +#if CUOPT_DEBUG_CLIQUE_TIGHTENING + if (max_corr > f_t{0} || min_corr < f_t{0}) { + printf( + "[clique-corr] gid=%d n_unfixed=%d sum_pos=%.6f max1=%.6f max_corr=%.6f | " + "sum_neg=%.6f min1=%.6f min_corr=%.6f\n", + (int)gid, + (int)n_unfixed, + (double)sum_pos, + (double)max1, + (double)max_corr, + (double)sum_neg, + (double)min1, + (double)min_corr); + } +#endif + } +} + +// One thread per constraint, sums its groups' corrections in fixed order. +// MUST gate on the same changed_constraints flag as calc_activity_kernel: +// calc_activity_kernel skips clean constraints, leaving min/max_activity at +// their previous (already clique-corrected) values. Re-subtracting here would +// double-correct and compound across iterations. +template +__global__ void apply_clique_corrections_to_activity_kernel( + raft::device_span constraint_group_offsets, + raft::device_span group_max_correction, + raft::device_span group_min_correction, + raft::device_span changed_constraints, + raft::device_span min_activity, + raft::device_span max_activity, + i_t n_constraints) +{ + i_t c = blockIdx.x * blockDim.x + threadIdx.x; + if (c >= n_constraints) return; + cuopt_assert(c + 1 < (i_t)constraint_group_offsets.size(), + "constraint id out of range for group offsets"); + // Must match calc_activity_kernel's gate exactly. + if (changed_constraints[c] == 0) return; + + i_t g_begin = constraint_group_offsets[c]; + i_t g_end = constraint_group_offsets[c + 1]; + cuopt_assert(g_begin <= g_end, "constraint group offsets not monotonic"); + cuopt_assert(g_end <= (i_t)group_max_correction.size(), + "constraint group offsets exceed group array"); + if (g_begin == g_end) return; + + f_t max_corr = 0, min_corr = 0; + for (i_t g = g_begin; g < g_end; ++g) { + cuopt_assert(group_max_correction[g] >= 0, "max correction must be non-negative"); + cuopt_assert(group_min_correction[g] <= 0, "min correction must be non-positive"); + max_corr += group_max_correction[g]; + min_corr += group_min_correction[g]; + } + max_activity[c] -= max_corr; + min_activity[c] -= min_corr; +} + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cu b/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cu index 82462c11ce..deca5a46c3 100644 --- a/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cu +++ b/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cu @@ -20,6 +20,7 @@ #include "clique_table.cuh" #include +#include #include #include #include @@ -100,7 +101,6 @@ template void make_coeff_positive_knapsack_constraint( const dual_simplex::user_problem_t& problem, std::vector>& knapsack_constraints, - std::unordered_set& set_packing_constraints, typename mip_solver_settings_t::tolerances_t tolerances) { for (i_t i = 0; i < (i_t)knapsack_constraints.size(); i++) { @@ -125,7 +125,6 @@ void make_coeff_positive_knapsack_constraint( } knapsack_constraint.is_set_packing = all_coeff_are_equal; if (!all_coeff_are_equal) { knapsack_constraint.is_set_partitioning = false; } - if (knapsack_constraint.is_set_packing) { set_packing_constraints.insert(i); } cuopt_assert(knapsack_constraint.rhs >= 0, "RHS must be non-negative"); } } @@ -185,9 +184,8 @@ void fill_knapsack_constraints(const dual_simplex::user_problem_t& pro } // equality part else { - // For equality rows, partitioning status should not depend on raw rhs scale here. - // The exact set-packing/partitioning check is finalized later in - // make_coeff_positive_knapsack_constraint after coefficient normalization. + // Final partitioning check is done after coefficient normalization in + // make_coeff_positive_knapsack_constraint. bool is_set_partitioning = true; bool ranged_constraint = ranged_constraint_counter < problem.num_range_rows && problem.range_rows[ranged_constraint_counter] == i; @@ -203,8 +201,7 @@ void fill_knapsack_constraints(const dual_simplex::user_problem_t& pro } // greater than part: convert it to less than knapsack_constraint_t knapsack_constraint2; - // Mark synthetic rows from equality splitting with negative ids so they never alias real row - // indices (including rows appended later by clique extension). + // Negative ids prevent aliasing with real row indices. knapsack_constraint2.cstr_idx = -(added_constraints + 1); added_constraints++; knapsack_constraint2.rhs = -problem.rhs[i]; @@ -228,62 +225,51 @@ void remove_small_cliques(clique_table_t& clique_table, cuopt::timer_t i_t num_removed_first = 0; i_t num_removed_addtl = 0; std::vector to_delete(clique_table.first.size(), false); - // if a clique is small, we remove it from the cliques and add it to adjlist + std::vector> small_edges; + + // Demote sub-threshold first-cliques into pairwise edges. for (size_t clique_idx = 0; clique_idx < clique_table.first.size(); clique_idx++) { if (timer.check_time_limit()) { return; } const auto& clique = clique_table.first[clique_idx]; - if (clique.size() <= (size_t)clique_table.min_clique_size) { + if (clique.size() < (size_t)clique_table.min_clique_size) { for (size_t i = 0; i < clique.size(); i++) { for (size_t j = 0; j < clique.size(); j++) { if (i == j) { continue; } - clique_table.adj_list_small_cliques[clique[i]].insert(clique[j]); + small_edges.emplace_back(clique[i], clique[j]); } } num_removed_first++; to_delete[clique_idx] = true; } } + std::vector addtl_to_delete(clique_table.addtl_cliques.size(), false); for (size_t addtl_c = 0; addtl_c < clique_table.addtl_cliques.size(); addtl_c++) { const auto& addtl_clique = clique_table.addtl_cliques[addtl_c]; const auto base_clique_idx = static_cast(addtl_clique.clique_idx); cuopt_assert(base_clique_idx < to_delete.size(), "Additional clique points to invalid base clique index"); - // Remove additional cliques whose base clique is scheduled for deletion. - if (to_delete[base_clique_idx]) { - // Materialize conflicts represented by: - // addtl_clique.vertex_idx + first[base_clique_idx][start_pos_on_clique:] - // before deleting both the additional and base clique entries. - for (size_t i = addtl_clique.start_pos_on_clique; - i < clique_table.first[base_clique_idx].size(); - i++) { - clique_table.adj_list_small_cliques[clique_table.first[base_clique_idx][i]].insert( - addtl_clique.vertex_idx); - clique_table.adj_list_small_cliques[addtl_clique.vertex_idx].insert( - clique_table.first[base_clique_idx][i]); - } - clique_table.addtl_cliques.erase(clique_table.addtl_cliques.begin() + addtl_c); - addtl_c--; - num_removed_addtl++; - continue; - } - i_t size_of_clique = + const bool drop_because_base = to_delete[base_clique_idx]; + const i_t extended_size = clique_table.first[base_clique_idx].size() - addtl_clique.start_pos_on_clique + 1; - if (size_of_clique < clique_table.min_clique_size) { - // the items from first clique are already added to the adjlist - // only add the items that are coming from the new var in the additional clique - for (size_t i = addtl_clique.start_pos_on_clique; - i < clique_table.first[base_clique_idx].size(); - i++) { - // insert conflicts both way - clique_table.adj_list_small_cliques[clique_table.first[base_clique_idx][i]].insert( - addtl_clique.vertex_idx); - clique_table.adj_list_small_cliques[addtl_clique.vertex_idx].insert( - clique_table.first[base_clique_idx][i]); - } - clique_table.addtl_cliques.erase(clique_table.addtl_cliques.begin() + addtl_c); - addtl_c--; - num_removed_addtl++; + const bool drop_because_small = extended_size < clique_table.min_clique_size; + if (!drop_because_base && !drop_because_small) { continue; } + + for (size_t i = addtl_clique.start_pos_on_clique; + i < clique_table.first[base_clique_idx].size(); + i++) { + const i_t base_member = clique_table.first[base_clique_idx][i]; + small_edges.emplace_back(base_member, addtl_clique.vertex_idx); + small_edges.emplace_back(addtl_clique.vertex_idx, base_member); } + addtl_to_delete[addtl_c] = true; + num_removed_addtl++; + } + { + size_t old_addtl_idx = 0; + auto addtl_it = std::remove_if(clique_table.addtl_cliques.begin(), + clique_table.addtl_cliques.end(), + [&](const auto&) { return addtl_to_delete[old_addtl_idx++]; }); + clique_table.addtl_cliques.erase(addtl_it, clique_table.addtl_cliques.end()); } CUOPT_LOG_DEBUG("Number of removed cliques from first: %d, additional: %d", num_removed_first, @@ -312,40 +298,46 @@ void remove_small_cliques(clique_table_t& clique_table, cuopt::timer_t (size_t)clique_table.min_clique_size, "A small clique remained after removing small cliques"); } - // Clique removals/edge materialization can change degrees; force recompute on next query. + clique_table.small_clique_adj.finalize_from_unsorted_pairs(2 * clique_table.n_variables, + small_edges); + // Force degree recompute after structural changes. std::fill(clique_table.var_degrees.begin(), clique_table.var_degrees.end(), -1); } template -std::unordered_set clique_table_t::get_adj_set_of_var(i_t var_idx) +std::unordered_set clique_table_t::get_adj_set_of_var(i_t var_idx) const { std::unordered_set adj_set; - for (const auto& clique_idx : var_clique_map_first[var_idx]) { - adj_set.insert(first[clique_idx].begin(), first[clique_idx].end()); - } - for (const auto& addtl_clique_idx : var_clique_map_addtl[var_idx]) { - adj_set.insert(addtl_cliques[addtl_clique_idx].vertex_idx); - adj_set.insert(first[addtl_cliques[addtl_clique_idx].clique_idx].begin() + - addtl_cliques[addtl_clique_idx].start_pos_on_clique, - first[addtl_cliques[addtl_clique_idx].clique_idx].end()); - } - // Reverse lookup for additional cliques using position map: - // if var_idx is in first[clique_idx][start_pos_on_clique:], it is adjacent to vertex_idx. - for (const auto& addtl : addtl_cliques) { - if (addtl.vertex_idx == var_idx) { continue; } - if (static_cast(addtl.clique_idx) < first_var_positions.size()) { - const auto& pos_map = first_var_positions[addtl.clique_idx]; - auto it = pos_map.find(var_idx); - if (it != pos_map.end() && it->second >= addtl.start_pos_on_clique) { - adj_set.insert(addtl.vertex_idx); - } + // First-clique edges: every member of each first-clique containing var_idx. + for (const i_t* it = var_clique_first.slice_begin(var_idx); + it != var_clique_first.slice_end(var_idx); + ++it) { + const auto& c = first[*it]; + adj_set.insert(c.begin(), c.end()); + } + + // Addtl-clique edges. + for (const i_t* it = var_clique_addtl.slice_begin(var_idx); + it != var_clique_addtl.slice_end(var_idx); + ++it) { + const auto& a = addtl_cliques[*it]; + if (a.vertex_idx == var_idx) { + // var_idx is the extension vertex; new neighbors are the base suffix. + const auto& base = first[a.clique_idx]; + adj_set.insert(base.begin() + a.start_pos_on_clique, base.end()); + } else { + // var_idx is a base member; only new edge is to the extension vertex. + adj_set.insert(a.vertex_idx); } } - for (const auto& adj_vertex : adj_list_small_cliques[var_idx]) { - adj_set.insert(adj_vertex); + for (const i_t* it = small_clique_adj.slice_begin(var_idx); + it != small_clique_adj.slice_end(var_idx); + ++it) { + adj_set.insert(*it); } + // Add the complement of var_idx to the adjacency set i_t complement_idx = (var_idx >= n_variables) ? (var_idx - n_variables) : (var_idx + n_variables); adj_set.insert(complement_idx); @@ -362,99 +354,58 @@ i_t clique_table_t::get_degree_of_var(i_t var_idx) } template -bool clique_table_t::check_adjacency(i_t var_idx1, i_t var_idx2) +bool clique_table_t::check_adjacency(i_t var_idx1, i_t var_idx2) const { if (var_idx1 == var_idx2) { return false; } if (var_idx1 % n_variables == var_idx2 % n_variables) { return true; } - { - auto it = adj_list_small_cliques.find(var_idx1); - if (it != adj_list_small_cliques.end() && it->second.count(var_idx2) > 0) { return true; } - } + // small_clique_adj is symmetric, so probe either direction. + if (small_clique_adj.slice_contains(var_idx1, var_idx2)) { return true; } - // Iterate whichever variable belongs to fewer first-cliques + // Probe through the var with the smaller var_clique_first slice. { i_t probe_var = var_idx1; i_t target_var = var_idx2; - if (var_clique_map_first[var_idx1].size() > var_clique_map_first[var_idx2].size()) { + if (var_clique_first.slice_size(var_idx1) > var_clique_first.slice_size(var_idx2)) { probe_var = var_idx2; target_var = var_idx1; } - for (const auto& clique_idx : var_clique_map_first[probe_var]) { - if (first_var_positions[clique_idx].count(target_var) > 0) { return true; } + for (const i_t* it = var_clique_first.slice_begin(probe_var); + it != var_clique_first.slice_end(probe_var); + ++it) { + if (first_var_positions[*it].count(target_var) > 0) { return true; } } } - for (const auto& addtl_idx : var_clique_map_addtl[var_idx1]) { - const auto& addtl = addtl_cliques[addtl_idx]; + for (const i_t* it = var_clique_addtl.slice_begin(var_idx1); + it != var_clique_addtl.slice_end(var_idx1); + ++it) { + const auto& addtl = addtl_cliques[*it]; const auto& pos_map = first_var_positions[addtl.clique_idx]; - auto it = pos_map.find(var_idx2); - if (it != pos_map.end() && it->second >= addtl.start_pos_on_clique) { return true; } + auto pos_it = pos_map.find(var_idx2); + if (pos_it != pos_map.end() && pos_it->second >= addtl.start_pos_on_clique) { return true; } } - - for (const auto& addtl_idx : var_clique_map_addtl[var_idx2]) { - const auto& addtl = addtl_cliques[addtl_idx]; + for (const i_t* it = var_clique_addtl.slice_begin(var_idx2); + it != var_clique_addtl.slice_end(var_idx2); + ++it) { + const auto& addtl = addtl_cliques[*it]; const auto& pos_map = first_var_positions[addtl.clique_idx]; - auto it = pos_map.find(var_idx1); - if (it != pos_map.end() && it->second >= addtl.start_pos_on_clique) { return true; } + auto pos_it = pos_map.find(var_idx1); + if (pos_it != pos_map.end() && pos_it->second >= addtl.start_pos_on_clique) { return true; } } return false; } -// this function should only be called within extend clique -// if this is called outside extend clique, csr matrix should be converted into csc and copied into -// problem because the problem is partly modified -template -void insert_clique_into_problem(const std::vector& clique, - dual_simplex::user_problem_t& problem, - dual_simplex::csr_matrix_t& A, - f_t coeff_scale) -{ - // convert vertices into original vars - f_t rhs_offset = 0.; - std::vector new_vars; - std::vector new_coeffs; - for (size_t i = 0; i < clique.size(); i++) { - f_t coeff = coeff_scale; - i_t var_idx = clique[i]; - if (var_idx >= problem.num_cols) { - coeff = -coeff_scale; - var_idx = var_idx - problem.num_cols; - rhs_offset += coeff_scale; - } - new_vars.push_back(var_idx); - new_coeffs.push_back(coeff); - } - // coeff_scale * (1 - x) = coeff_scale - coeff_scale * x - // Move constants to the right, so rhs must decrease by rhs_offset. - f_t rhs = coeff_scale - rhs_offset; - // insert the new clique into the problem as a new constraint - dual_simplex::sparse_vector_t new_row(A.n, new_vars.size()); - new_row.i = std::move(new_vars); - new_row.x = std::move(new_coeffs); - A.append_row(new_row); - problem.row_sense.push_back('L'); - problem.rhs.push_back(rhs); - problem.row_names.push_back("Clique" + std::to_string(problem.row_names.size())); -} - +// Returns true on success; `work_out` accumulates scan/hash ops as a +// near-uniform wall-time proxy. template bool extend_clique(const std::vector& clique, clique_table_t& clique_table, - dual_simplex::user_problem_t& problem, - dual_simplex::csr_matrix_t& A, - f_t coeff_scale, - bool modify_problem, - i_t min_extension_gain, - i_t remaining_rows_budget, - i_t remaining_nnz_budget, - i_t& inserted_row_nnz) + double& work_out) { - inserted_row_nnz = 0; i_t smallest_degree = std::numeric_limits::max(); i_t smallest_degree_var = -1; - // find smallest degree vertex in the current set packing constraint for (size_t idx = 0; idx < clique.size(); idx++) { i_t var_idx = clique[idx]; i_t degree = clique_table.get_degree_of_var(var_idx); @@ -463,108 +414,58 @@ bool extend_clique(const std::vector& clique, smallest_degree_var = var_idx; } } - std::vector extension_candidates; + work_out += static_cast(clique.size()); + auto smallest_degree_adj_set = clique_table.get_adj_set_of_var(smallest_degree_var); + const double D = static_cast(smallest_degree_adj_set.size()); + work_out += D; + std::unordered_set clique_members(clique.begin(), clique.end()); + work_out += static_cast(clique.size()); + + std::vector extension_candidates; + extension_candidates.reserve(smallest_degree_adj_set.size()); for (const auto& candidate : smallest_degree_adj_set) { if (clique_members.find(candidate) == clique_members.end()) { extension_candidates.push_back(candidate); } } + work_out += D; + std::sort(extension_candidates.begin(), extension_candidates.end(), [&](i_t a, i_t b) { return clique_table.get_degree_of_var(a) > clique_table.get_degree_of_var(b); }); - auto new_clique = clique; - i_t n_of_complement_conflicts = 0; - i_t complement_conflict_var = -1; + const double C = static_cast(extension_candidates.size()); + if (C > 1.0) { work_out += C * std::log2(C); } + + auto new_clique = clique; for (size_t idx = 0; idx < extension_candidates.size(); idx++) { - i_t var_idx = extension_candidates[idx]; - bool add = true; - bool complement_conflict = false; - i_t complement_conflict_idx = -1; + i_t var_idx = extension_candidates[idx]; + bool add = true; for (size_t i = 0; i < new_clique.size(); i++) { - if (var_idx % clique_table.n_variables == new_clique[i] % clique_table.n_variables) { - complement_conflict = true; - complement_conflict_idx = var_idx % clique_table.n_variables; - } - // check if the tested variable conflicts with all vars in the new clique + work_out += 1.0; if (!clique_table.check_adjacency(var_idx, new_clique[i])) { add = false; break; } } - if (add) { - new_clique.push_back(var_idx); - if (complement_conflict) { - n_of_complement_conflicts++; - complement_conflict_var = complement_conflict_idx; - } - } + if (add) { new_clique.push_back(var_idx); } } - // if we found a larger cliqe, insert it into the formulation + if (new_clique.size() > clique.size()) { - if (n_of_complement_conflicts > 0) { - CUOPT_LOG_DEBUG("Found %d complement conflicts on var %d", - n_of_complement_conflicts, - complement_conflict_var); - cuopt_assert(n_of_complement_conflicts == 1, "There can only be one complement conflict"); - // Keep the discovered extension in the clique table for downstream dominance checks. - clique_table.first.push_back(new_clique); - for (const auto& var_idx : new_clique) { - clique_table.var_degrees[var_idx] = -1; - } - if (modify_problem) { - // fix all other variables other than complementing var - for (size_t i = 0; i < new_clique.size(); i++) { - if (new_clique[i] % clique_table.n_variables != complement_conflict_var) { - CUOPT_LOG_DEBUG("Fixing variable %d", new_clique[i]); - if (new_clique[i] >= problem.num_cols) { - cuopt_assert(problem.lower[new_clique[i] - problem.num_cols] != 0 || - problem.upper[new_clique[i] - problem.num_cols] != 0, - "Variable is fixed to other side"); - problem.lower[new_clique[i] - problem.num_cols] = 1; - problem.upper[new_clique[i] - problem.num_cols] = 1; - } else { - cuopt_assert(problem.lower[new_clique[i]] != 1 || problem.upper[new_clique[i]] != 1, - "Variable is fixed to other side"); - problem.lower[new_clique[i]] = 0; - problem.upper[new_clique[i]] = 0; - } - } - } - } - return true; - } else { - // Keep the discovered extension in the clique table even when row insertion is skipped by - // row/nnz budgets. - clique_table.first.push_back(new_clique); - for (const auto& var_idx : new_clique) { - clique_table.var_degrees[var_idx] = -1; - } + clique_table.first.push_back(new_clique); + for (const auto& var_idx : new_clique) { + clique_table.var_degrees[var_idx] = -1; + } + work_out += static_cast(new_clique.size()); #if DEBUG_KNAPSACK_CONSTRAINTS - CUOPT_LOG_DEBUG("Extended clique: %lu from %lu", new_clique.size(), clique.size()); + CUOPT_LOG_DEBUG("Extended clique: %lu from %lu", new_clique.size(), clique.size()); #endif - i_t extension_gain = static_cast(new_clique.size() - clique.size()); - if (extension_gain < min_extension_gain) { return true; } - if (remaining_rows_budget <= 0 || - remaining_nnz_budget < static_cast(new_clique.size())) { - return true; - } - // Row insertion is now deferred until dominance is confirmed against model rows. - // This keeps extension and replacement sequential: detect dominance first, then replace. - inserted_row_nnz = 0; - } + return true; } - return new_clique.size() > clique.size(); + return false; } -template -struct clique_sig_t { - i_t knapsack_idx; - i_t size; - long long signature; -}; - template struct extension_candidate_t { i_t knapsack_idx; @@ -572,19 +473,6 @@ struct extension_candidate_t { i_t clique_size; }; -template -bool compare_clique_sig(const clique_sig_t& a, const clique_sig_t& b) -{ - if (a.signature != b.signature) { return a.signature < b.signature; } - return a.size < b.size; -} - -template -bool compare_signature_value(long long value, const clique_sig_t& a) -{ - return value < a.signature; -} - template bool compare_extension_candidate(const extension_candidate_t& a, const extension_candidate_t& b) @@ -594,265 +482,28 @@ bool compare_extension_candidate(const extension_candidate_t& a, return a.knapsack_idx < b.knapsack_idx; } -template -bool is_sorted_subset(const std::vector& a, const std::vector& b) -{ - size_t i = 0; - size_t j = 0; - while (i < a.size() && j < b.size()) { - if (a[i] == b[j]) { - i++; - j++; - } else if (a[i] > b[j]) { - j++; - } else { - return false; - } - } - return i == a.size(); -} - -template -void fix_difference(const std::vector& superset, - const std::vector& subset, - dual_simplex::user_problem_t& problem) -{ - cuopt_assert(std::is_sorted(subset.begin(), subset.end()), - "subset vector passed to fix_difference is not sorted"); - for (auto var_idx : superset) { - if (std::binary_search(subset.begin(), subset.end(), var_idx)) { continue; } - if (var_idx >= problem.num_cols) { - i_t orig_idx = var_idx - problem.num_cols; - CUOPT_LOG_DEBUG("Fixing variable %d", orig_idx); - cuopt_assert(problem.lower[orig_idx] != 0 || problem.upper[orig_idx] != 0, - "Variable is fixed to other side"); - problem.lower[orig_idx] = 1; - problem.upper[orig_idx] = 1; - } else { - CUOPT_LOG_DEBUG("Fixing variable %d", var_idx); - cuopt_assert(problem.lower[var_idx] != 1 || problem.upper[var_idx] != 1, - "Variable is fixed to other side"); - problem.lower[var_idx] = 0; - problem.upper[var_idx] = 0; - } - } -} - -template -void remove_marked_elements(std::vector& vec, const std::vector& removal_marker) -{ - size_t write_idx = 0; - for (size_t i = 0; i < vec.size(); i++) { - if (!removal_marker[i]) { - if (write_idx != i) { vec[write_idx] = std::move(vec[i]); } - write_idx++; - } - } - vec.resize(write_idx); -} - -template -void remove_dominated_cliques_in_problem_for_single_extended_clique( - const std::vector& curr_clique, - f_t coeff_scale, - i_t remaining_rows_budget, - i_t remaining_nnz_budget, - i_t& inserted_row_nnz, - const std::vector>& sp_sigs, - const std::vector>& cstr_vars, - const std::vector>& knapsack_constraints, - std::vector& original_to_current_row_idx, - dual_simplex::user_problem_t& problem, - dual_simplex::csr_matrix_t& A, - cuopt::timer_t& timer) -{ - inserted_row_nnz = 0; - if (curr_clique.empty() || sp_sigs.empty()) { return; } - std::vector curr_clique_vars(curr_clique.begin(), curr_clique.end()); - std::sort(curr_clique_vars.begin(), curr_clique_vars.end()); - curr_clique_vars.erase(std::unique(curr_clique_vars.begin(), curr_clique_vars.end()), - curr_clique_vars.end()); - long long signature = 0; - for (auto v : curr_clique_vars) { - signature += static_cast(v); - } - constexpr size_t dominance_window = 20000; - auto end_it = - std::upper_bound(sp_sigs.begin(), sp_sigs.end(), signature, compare_signature_value); - size_t end = static_cast(std::distance(sp_sigs.begin(), end_it)); - size_t start = (end > dominance_window) ? (end - dominance_window) : 0; - std::vector rows_to_remove; - bool covering_clique_implied_by_partitioning = false; - for (size_t idx = end; idx > start; idx--) { - if (timer.check_time_limit()) { break; } - const auto& sp = sp_sigs[idx - 1]; - const auto& vars_sp = cstr_vars[sp.knapsack_idx]; - if (vars_sp.size() > curr_clique_vars.size()) { continue; } - cuopt_assert(std::is_sorted(vars_sp.begin(), vars_sp.end()), - "vars_sp vector passed to is_sorted_subset is not sorted"); - if (!is_sorted_subset(vars_sp, curr_clique_vars)) { continue; } - if (knapsack_constraints[sp.knapsack_idx].is_set_partitioning) { - if (vars_sp.size() != curr_clique_vars.size()) { - fix_difference(curr_clique_vars, vars_sp, problem); - covering_clique_implied_by_partitioning = true; - } - continue; - } - i_t original_row_idx = knapsack_constraints[sp.knapsack_idx].cstr_idx; - if (original_row_idx < 0) { continue; } - cuopt_assert(original_row_idx < static_cast(original_to_current_row_idx.size()), - "Invalid original row index in knapsack constraint"); - i_t current_row_idx = original_to_current_row_idx[original_row_idx]; - if (current_row_idx < 0) { continue; } - cuopt_assert(current_row_idx < static_cast(problem.row_sense.size()), - "Invalid current row index in row mapping"); - rows_to_remove.push_back(current_row_idx); - } - if (rows_to_remove.empty()) { return; } - std::sort(rows_to_remove.begin(), rows_to_remove.end()); - rows_to_remove.erase(std::unique(rows_to_remove.begin(), rows_to_remove.end()), - rows_to_remove.end()); - if (!covering_clique_implied_by_partitioning) { - if (remaining_rows_budget <= 0 || - remaining_nnz_budget < static_cast(curr_clique_vars.size())) { - return; - } - insert_clique_into_problem(curr_clique_vars, problem, A, coeff_scale); - inserted_row_nnz = static_cast(curr_clique_vars.size()); - } - std::vector removal_marker(problem.row_sense.size(), 0); - for (auto row_idx : rows_to_remove) { - cuopt_assert(row_idx >= 0 && row_idx < static_cast(removal_marker.size()), - "Invalid dominated row index"); - CUOPT_LOG_DEBUG("Removing dominated row %d", row_idx); - removal_marker[row_idx] = true; - } - dual_simplex::csr_matrix_t A_removed(0, 0, 0); - A.remove_rows(removal_marker, A_removed); - A = std::move(A_removed); - problem.num_rows = A.m; - remove_marked_elements(problem.row_sense, removal_marker); - remove_marked_elements(problem.rhs, removal_marker); - remove_marked_elements(problem.row_names, removal_marker); - cuopt_assert(problem.rhs.size() == problem.row_sense.size(), "rhs and row sense size mismatch"); - cuopt_assert(problem.row_names.size() == problem.rhs.size(), "row names and rhs size mismatch"); - cuopt_assert(problem.num_rows == static_cast(problem.rhs.size()), - "matrix and num rows mismatch after removal"); - if (!problem.range_rows.empty()) { - std::vector old_to_new_indices; - old_to_new_indices.reserve(removal_marker.size()); - i_t new_idx = 0; - for (size_t i = 0; i < removal_marker.size(); ++i) { - if (!removal_marker[i]) { - old_to_new_indices.push_back(new_idx++); - } else { - old_to_new_indices.push_back(-1); - } - } - std::vector new_range_rows; - std::vector new_range_values; - for (size_t i = 0; i < problem.range_rows.size(); ++i) { - i_t old_row = problem.range_rows[i]; - cuopt_assert(old_row >= 0 && old_row < static_cast(removal_marker.size()), - "Invalid row index in range_rows"); - if (!removal_marker[old_row]) { - i_t new_row = old_to_new_indices[old_row]; - cuopt_assert(new_row != -1, "Invalid new row index for ranged row renumbering"); - new_range_rows.push_back(new_row); - new_range_values.push_back(problem.range_value[i]); - } - } - problem.range_rows = std::move(new_range_rows); - problem.range_value = std::move(new_range_values); - } - problem.num_range_rows = static_cast(problem.range_rows.size()); - std::vector removed_prefix(removal_marker.size() + 1, 0); - for (size_t row_idx = 0; row_idx < removal_marker.size(); row_idx++) { - removed_prefix[row_idx + 1] = - removed_prefix[row_idx] + static_cast(removal_marker[row_idx]); - } - for (i_t row_idx = 0; row_idx < static_cast(original_to_current_row_idx.size()); row_idx++) { - i_t current_row_idx = original_to_current_row_idx[row_idx]; - if (current_row_idx < 0) { continue; } - cuopt_assert(current_row_idx < static_cast(removal_marker.size()), - "Row index map is out of bounds"); - if (removal_marker[current_row_idx]) { - original_to_current_row_idx[row_idx] = -1; - } else { - original_to_current_row_idx[row_idx] = current_row_idx - removed_prefix[current_row_idx]; - } - } -} - -// Also known as clique merging. Infer larger clique constraints which allows inclusion of vars from -// other constraints. This only extends the original cliques in the formulation for now. -// TODO: consider a heuristic on how much of the cliques derived from knapsacks to include here +// Extends set-packing cliques. Soft floor: min_work; hard ceiling: max_work +// or `timer`. signal_extend only honored after min_work. template i_t extend_cliques(const std::vector>& knapsack_constraints, - const std::unordered_set& set_packing_constraints, clique_table_t& clique_table, - dual_simplex::user_problem_t& problem, - dual_simplex::csr_matrix_t& A, - bool modify_problem, cuopt::timer_t& timer, double* work_estimate_out, - double max_work_estimate) + double min_work, + double max_work, + std::atomic* signal_extend) { - constexpr i_t min_extension_gain = 2; - constexpr i_t extension_yield_window = 64; - constexpr i_t min_successes_per_window = 1; + constexpr i_t min_extension_gain = 2; double local_work = 0.0; double& work = work_estimate_out ? *work_estimate_out : local_work; - i_t base_rows = A.m; - i_t base_nnz = A.row_start[A.m]; - i_t max_added_rows = std::max(8, base_rows / 50); - i_t max_added_nnz = std::max(8 * clique_table.max_clique_size_for_extension, base_nnz / 50); - - i_t added_rows = 0; - i_t added_nnz = 0; - i_t window_attempts = 0; - i_t window_successes = 0; - - CUOPT_LOG_DEBUG("Clique extension heuristics: min_gain=%d row_budget=%d nnz_budget=%d", - min_extension_gain, - max_added_rows, - max_added_nnz); - std::vector> cstr_vars(knapsack_constraints.size()); - std::vector> sp_sigs; - sp_sigs.reserve(set_packing_constraints.size()); - for (const auto knapsack_idx : set_packing_constraints) { - cuopt_assert(knapsack_idx >= 0 && knapsack_idx < static_cast(knapsack_constraints.size()), - "Invalid set packing constraint index"); - const auto& vars = knapsack_constraints[knapsack_idx].entries; - cstr_vars[knapsack_idx].reserve(vars.size()); - for (const auto& entry : vars) { - cstr_vars[knapsack_idx].push_back(entry.col); - } - std::sort(cstr_vars[knapsack_idx].begin(), cstr_vars[knapsack_idx].end()); - cstr_vars[knapsack_idx].erase( - std::unique(cstr_vars[knapsack_idx].begin(), cstr_vars[knapsack_idx].end()), - cstr_vars[knapsack_idx].end()); - long long signature = 0; - for (auto v : cstr_vars[knapsack_idx]) { - signature += static_cast(v); - } - sp_sigs.push_back({knapsack_idx, static_cast(cstr_vars[knapsack_idx].size()), signature}); - work += cstr_vars[knapsack_idx].size(); - } - if (work > max_work_estimate) { return 0; } - std::sort(sp_sigs.begin(), sp_sigs.end(), compare_clique_sig); - std::vector original_to_current_row_idx(problem.row_sense.size(), -1); - for (i_t row_idx = 0; row_idx < static_cast(original_to_current_row_idx.size()); row_idx++) { - original_to_current_row_idx[row_idx] = row_idx; - } std::vector> extension_worklist; extension_worklist.reserve(knapsack_constraints.size()); for (i_t knapsack_idx = 0; knapsack_idx < static_cast(knapsack_constraints.size()); knapsack_idx++) { if (timer.check_time_limit()) { break; } - if (work > max_work_estimate) { break; } + if (work >= max_work) { break; } const auto& knapsack_constraint = knapsack_constraints[knapsack_idx]; if (!knapsack_constraint.is_set_packing) { continue; } i_t clique_size = static_cast(knapsack_constraint.entries.size()); @@ -864,99 +515,93 @@ i_t extend_cliques(const std::vector>& knapsack_ i_t estimated_gain = std::max(0, smallest_degree - (clique_size - 1)); if (estimated_gain < min_extension_gain) { continue; } extension_worklist.push_back({knapsack_idx, estimated_gain, clique_size}); - work += knapsack_constraint.entries.size(); + work += static_cast(knapsack_constraint.entries.size()); } std::stable_sort( extension_worklist.begin(), extension_worklist.end(), compare_extension_candidate); + if (!extension_worklist.empty()) { + work += static_cast(extension_worklist.size()) * + std::log2(static_cast(extension_worklist.size())); + } CUOPT_LOG_DEBUG("Clique extension candidates after scoring: %zu", extension_worklist.size()); i_t n_extended_cliques = 0; for (const auto& candidate : extension_worklist) { if (timer.check_time_limit()) { break; } - if (work > max_work_estimate) { break; } - if (added_rows >= max_added_rows || added_nnz >= max_added_nnz) { - CUOPT_LOG_DEBUG( - "Stopping clique extension: budget reached (rows=%d nnz=%d)", added_rows, added_nnz); - break; + if (work >= min_work) { + if (work >= max_work) { break; } + if (signal_extend && signal_extend->load(std::memory_order_acquire)) { + CUOPT_LOG_DEBUG("Stopping clique extension: cut-pass signal received (work=%.0f)", work); + break; + } } - window_attempts++; const auto& knapsack_constraint = knapsack_constraints[candidate.knapsack_idx]; std::vector clique; + clique.reserve(knapsack_constraint.entries.size()); for (const auto& entry : knapsack_constraint.entries) { clique.push_back(entry.col); } - i_t inserted_row_nnz = 0; - f_t coeff_scale = knapsack_constraint.entries[0].val; - bool extended_clique = extend_clique(clique, - clique_table, - problem, - A, - coeff_scale, - modify_problem, - min_extension_gain, - max_added_rows - added_rows, - max_added_nnz - added_nnz, - inserted_row_nnz); - work += clique.size() * clique.size(); - if (extended_clique) { - n_extended_cliques++; - i_t replacement_row_nnz = 0; - if (modify_problem) { - remove_dominated_cliques_in_problem_for_single_extended_clique(clique_table.first.back(), - coeff_scale, - max_added_rows - added_rows, - max_added_nnz - added_nnz, - replacement_row_nnz, - sp_sigs, - cstr_vars, - knapsack_constraints, - original_to_current_row_idx, - problem, - A, - timer); - } - if (replacement_row_nnz > 0) { - window_successes++; - added_rows++; - added_nnz += replacement_row_nnz; - } - } - if (window_attempts >= extension_yield_window) { - if (window_successes < min_successes_per_window) { - CUOPT_LOG_DEBUG( - "Stopping clique extension: low yield (%d/%d)", window_successes, window_attempts); - break; - } - window_attempts = 0; - window_successes = 0; - } + if (extend_clique(clique, clique_table, work)) { n_extended_cliques++; } } - if (modify_problem) { - // copy modified matrix back to problem - A.to_compressed_col(problem.A); - } - CUOPT_LOG_DEBUG("Number of extended cliques: %d", n_extended_cliques); + CUOPT_LOG_DEBUG("Number of extended cliques: %d (work=%.0f)", n_extended_cliques, work); return n_extended_cliques; } template void fill_var_clique_maps(clique_table_t& clique_table) { - clique_table.first_var_positions.resize(clique_table.first.size()); + const i_t n_vertices = 2 * clique_table.n_variables; + + // first_var_positions: per-clique hash map (cliques small ⇒ hash beats binary search). + clique_table.first_var_positions.assign(clique_table.first.size(), {}); + + std::vector> first_pairs; + size_t total_first_members = 0; + for (const auto& c : clique_table.first) { + total_first_members += c.size(); + } + first_pairs.reserve(total_first_members); + for (size_t clique_idx = 0; clique_idx < clique_table.first.size(); clique_idx++) { const auto& clique = clique_table.first[clique_idx]; auto& pos_map = clique_table.first_var_positions[clique_idx]; pos_map.reserve(clique.size()); for (size_t idx = 0; idx < clique.size(); idx++) { - i_t var_idx = clique[idx]; - clique_table.var_clique_map_first[var_idx].insert(clique_idx); + const i_t var_idx = clique[idx]; + first_pairs.emplace_back(var_idx, static_cast(clique_idx)); pos_map[var_idx] = static_cast(idx); } } - for (size_t addtl_c = 0; addtl_c < clique_table.addtl_cliques.size(); addtl_c++) { - const auto& addtl_clique = clique_table.addtl_cliques[addtl_c]; - clique_table.var_clique_map_addtl[addtl_clique.vertex_idx].insert(addtl_c); + clique_table.var_clique_first.finalize_from_unsorted_pairs(n_vertices, first_pairs); + + std::vector> addtl_pairs; + for (size_t addtl_c = 0; addtl_c < clique_table.addtl_cliques.size(); ++addtl_c) { + const auto& a = clique_table.addtl_cliques[addtl_c]; + addtl_pairs.emplace_back(a.vertex_idx, static_cast(addtl_c)); + const auto& base = clique_table.first[a.clique_idx]; + for (i_t pos = a.start_pos_on_clique; pos < static_cast(base.size()); ++pos) { + addtl_pairs.emplace_back(base[pos], static_cast(addtl_c)); + } + } + clique_table.var_clique_addtl.finalize_from_unsorted_pairs(n_vertices, addtl_pairs); +} + +template +void clique_table_t::set_small_clique_adj_for_test( + const std::unordered_map>& edges) +{ + std::vector> pairs; + size_t total = 0; + for (const auto& kv : edges) { + total += kv.second.size(); + } + pairs.reserve(total); + for (const auto& kv : edges) { + for (const auto& v : kv.second) { + pairs.emplace_back(kv.first, v); + } } + small_clique_adj.finalize_from_unsorted_pairs(2 * n_variables, pairs); } template @@ -972,12 +617,10 @@ void build_clique_table(const dual_simplex::user_problem_t& problem, cuopt_assert(problem.var_types.size() == static_cast(problem.num_cols), "Problem variable types size mismatch"); std::vector> knapsack_constraints; - std::unordered_set set_packing_constraints; dual_simplex::csr_matrix_t A(problem.num_rows, problem.num_cols, 0); problem.A.to_compressed_row(A); fill_knapsack_constraints(problem, knapsack_constraints, A); - make_coeff_positive_knapsack_constraint( - problem, knapsack_constraints, set_packing_constraints, tolerances); + make_coeff_positive_knapsack_constraint(problem, knapsack_constraints, tolerances); sort_csr_by_constraint_coefficients(knapsack_constraints); clique_table.tolerances = tolerances; for (const auto& knapsack_constraint : knapsack_constraints) { @@ -1035,7 +678,6 @@ void find_initial_cliques(dual_simplex::user_problem_t& problem, typename mip_solver_settings_t::tolerances_t tolerances, std::shared_ptr>* clique_table_out, cuopt::timer_t& timer, - bool modify_problem, std::atomic* signal_extend) { cuopt::timer_t stage_timer(std::numeric_limits::infinity()); @@ -1050,15 +692,13 @@ void find_initial_cliques(dual_simplex::user_problem_t& problem, double t_remove = 0.; #endif std::vector> knapsack_constraints; - std::unordered_set set_packing_constraints; dual_simplex::csr_matrix_t A(problem.num_rows, problem.num_cols, 0); problem.A.to_compressed_row(A); fill_knapsack_constraints(problem, knapsack_constraints, A); #ifdef DEBUG_CLIQUE_TABLE t_fill = stage_timer.elapsed_time(); #endif - make_coeff_positive_knapsack_constraint( - problem, knapsack_constraints, set_packing_constraints, tolerances); + make_coeff_positive_knapsack_constraint(problem, knapsack_constraints, tolerances); #ifdef DEBUG_CLIQUE_TABLE t_coeff = stage_timer.elapsed_time(); #endif @@ -1083,9 +723,9 @@ void find_initial_cliques(dual_simplex::user_problem_t& problem, double time_limit_for_additional_cliques = timer.remaining_time() / 2; cuopt::timer_t additional_cliques_timer(time_limit_for_additional_cliques); double find_work_estimate = 0.0; + // Always build base cliques in full; signal_extend only gates the extension phase. for (const auto& knapsack_constraint : knapsack_constraints) { if (timer.check_time_limit()) { break; } - if (signal_extend && signal_extend->load(std::memory_order_acquire)) { break; } find_cliques_from_constraint(knapsack_constraint, *clique_table_ptr, additional_cliques_timer); find_work_estimate += knapsack_constraint.entries.size(); } @@ -1105,17 +745,15 @@ void find_initial_cliques(dual_simplex::user_problem_t& problem, t_maps = stage_timer.elapsed_time(); #endif if (clique_table_out != nullptr) { *clique_table_out = std::move(clique_table_shared); } - double extend_work = 0.0; - constexpr double max_extend_work = 2e9; - i_t n_extended_cliques = extend_cliques(knapsack_constraints, - set_packing_constraints, + double extend_work = 0.0; + i_t n_extended_cliques = extend_cliques(knapsack_constraints, *clique_table_ptr, - problem, - A, - modify_problem, timer, &extend_work, - max_extend_work); + clique_config.min_extend_work, + clique_config.max_extend_work, + signal_extend); + if (n_extended_cliques > 0) { fill_var_clique_maps(*clique_table_ptr); } #ifdef DEBUG_CLIQUE_TABLE t_extend = stage_timer.elapsed_time(); CUOPT_LOG_DEBUG( @@ -1134,21 +772,21 @@ void find_initial_cliques(dual_simplex::user_problem_t& problem, #endif } -#define INSTANTIATE(F_TYPE) \ - template void find_initial_cliques( \ - dual_simplex::user_problem_t & problem, \ - typename mip_solver_settings_t::tolerances_t tolerances, \ - std::shared_ptr> * clique_table_out, \ - cuopt::timer_t & timer, \ - bool modify_problem, \ - std::atomic* signal_extend); \ - template void build_clique_table( \ - const dual_simplex::user_problem_t& problem, \ - clique_table_t& clique_table, \ - typename mip_solver_settings_t::tolerances_t tolerances, \ - bool remove_small_cliques_flag, \ - bool fill_var_clique_maps_flag, \ - cuopt::timer_t& timer); \ +#define INSTANTIATE(F_TYPE) \ + template void find_initial_cliques( \ + dual_simplex::user_problem_t & problem, \ + typename mip_solver_settings_t::tolerances_t tolerances, \ + std::shared_ptr> * clique_table_out, \ + cuopt::timer_t & timer, \ + std::atomic * signal_extend); \ + template void build_clique_table( \ + const dual_simplex::user_problem_t& problem, \ + clique_table_t& clique_table, \ + typename mip_solver_settings_t::tolerances_t tolerances, \ + bool remove_small_cliques_flag, \ + bool fill_var_clique_maps_flag, \ + cuopt::timer_t& timer); \ + template void fill_var_clique_maps(clique_table_t & clique_table); \ template class clique_table_t; #if MIP_INSTANTIATE_FLOAT diff --git a/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cuh b/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cuh index 944241b4f0..0b26d0728e 100644 --- a/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cuh +++ b/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cuh @@ -23,9 +23,11 @@ #include #include +#include #include #include #include +#include #include namespace cuopt::linear_programming::detail { @@ -33,6 +35,10 @@ namespace cuopt::linear_programming::detail { struct clique_config_t { int min_clique_size = 512; int max_clique_size_for_extension = 128; + // extend_cliques work budget; one unit ≈ one hash/scan op in extend_clique. + // Soft floor before honoring cut-gen signal; hard ceiling. + double min_extend_work = 1e7; + double max_extend_work = 2e9; }; template @@ -59,37 +65,134 @@ struct addtl_clique_t { i_t start_pos_on_clique; }; +// CSR per-vertex map: for v in [0, n_vertices), `indices[offsets[v] .. +// offsets[v+1])` is a sorted slice. Build protocol: callers push (src, value) +// pairs and call `finalize_from_unsorted_pairs`. +template +struct csr_var_map_t { + std::vector offsets; // size: n_vertices + 1; offsets[v] is the start in `indices` + std::vector indices; // sorted within each [offsets[v], offsets[v+1]) slice + + void clear_and_resize(i_t n_vertices) + { + offsets.assign(n_vertices + 1, 0); + indices.clear(); + } + i_t n_keys() const { return offsets.empty() ? 0 : static_cast(offsets.size() - 1); } + i_t slice_size(i_t v) const { return offsets[v + 1] - offsets[v]; } + const i_t* slice_begin(i_t v) const { return indices.data() + offsets[v]; } + const i_t* slice_end(i_t v) const { return indices.data() + offsets[v + 1]; } + // O(1) summary used by cut/extension cost-budget heuristics. + double avg_slice_size() const + { + const i_t k = n_keys(); + return k > 0 ? static_cast(indices.size()) / static_cast(k) : 0.0; + } + bool slice_contains(i_t v, i_t value) const + { + const i_t* b = slice_begin(v); + const i_t* e = slice_end(v); + return std::binary_search(b, e, value); + } + + // Build CSR from unsorted (src, value) pairs. Each output slice is sorted + // and deduplicated. Caller must keep p.first in [0, n_vertices). + void finalize_from_unsorted_pairs(i_t n_vertices, std::vector>& pairs) + { + offsets.assign(n_vertices + 1, 0); + for (const auto& p : pairs) { + offsets[p.first + 1]++; + } + for (i_t v = 1; v <= n_vertices; ++v) { + offsets[v] += offsets[v - 1]; + } + indices.assign(static_cast(offsets.back()), i_t{0}); + std::vector head(n_vertices, 0); + for (const auto& p : pairs) { + indices[offsets[p.first] + head[p.first]++] = p.second; + } + for (i_t v = 0; v < n_vertices; ++v) { + auto* b = indices.data() + offsets[v]; + auto* e = indices.data() + offsets[v] + head[v]; + std::sort(b, e); + auto* new_end = std::unique(b, e); + head[v] = static_cast(new_end - b); + } + // Compact away dedupe holes. + std::vector new_offsets(n_vertices + 1, 0); + for (i_t v = 0; v < n_vertices; ++v) { + new_offsets[v + 1] = new_offsets[v] + head[v]; + } + if (new_offsets.back() != offsets.back()) { + std::vector new_indices(static_cast(new_offsets.back())); + for (i_t v = 0; v < n_vertices; ++v) { + std::copy(indices.data() + offsets[v], + indices.data() + offsets[v] + head[v], + new_indices.data() + new_offsets[v]); + } + offsets = std::move(new_offsets); + indices = std::move(new_indices); + } else { + offsets = std::move(new_offsets); + } + } +}; + template struct clique_table_t { clique_table_t(i_t n_vertices, i_t min_clique_size_, i_t max_clique_size_for_extension_) : min_clique_size(min_clique_size_), max_clique_size_for_extension(max_clique_size_for_extension_), - var_clique_map_first(n_vertices), - var_clique_map_addtl(n_vertices), - adj_list_small_cliques(n_vertices), var_degrees(n_vertices, -1), n_variables(n_vertices / 2) + { + var_clique_first.clear_and_resize(n_vertices); + var_clique_addtl.clear_and_resize(n_vertices); + small_clique_adj.clear_and_resize(n_vertices); + } + + // Copy disabled; move provided so tests can return by value. + // Move-assign omitted because of const members. + clique_table_t(const clique_table_t&) = delete; + clique_table_t& operator=(const clique_table_t&) = delete; + + clique_table_t(clique_table_t&& other) noexcept + : first(std::move(other.first)), + addtl_cliques(std::move(other.addtl_cliques)), + var_clique_first(std::move(other.var_clique_first)), + var_clique_addtl(std::move(other.var_clique_addtl)), + first_var_positions(std::move(other.first_var_positions)), + small_clique_adj(std::move(other.small_clique_adj)), + var_degrees(std::move(other.var_degrees)), + n_variables(other.n_variables), + min_clique_size(other.min_clique_size), + max_clique_size_for_extension(other.max_clique_size_for_extension), + tolerances(other.tolerances) { } - std::unordered_set get_adj_set_of_var(i_t var_idx); + clique_table_t& operator=(clique_table_t&&) = delete; + + std::unordered_set get_adj_set_of_var(i_t var_idx) const; i_t get_degree_of_var(i_t var_idx); - bool check_adjacency(i_t var_idx1, i_t var_idx2); + bool check_adjacency(i_t var_idx1, i_t var_idx2) const; + + void set_small_clique_adj_for_test(const std::unordered_map>& edges); // keeps the large cliques in each constraint std::vector> first; // keeps the additional cliques std::vector> addtl_cliques; - // TODO figure out the performance of lookup for the following: unordered_set vs vector - // keeps the indices of original(first) cliques that contain variable x - std::vector> var_clique_map_first; - // keeps the indices of additional cliques that contain variable x - std::vector> var_clique_map_addtl; + // var_idx → indices of `first` cliques that contain var_idx (CSR). + csr_var_map_t var_clique_first; + // var_idx → indices of `addtl_cliques` containing var_idx (as the extension + // vertex or as a base-suffix member). + csr_var_map_t var_clique_addtl; // var_idx -> position mapping for each first clique, enabling O(1) membership/position checks std::vector> first_var_positions; - // adjacency list to keep small cliques, this basically keeps the vars share a small clique - // constraint - std::unordered_map> adj_list_small_cliques; + // var_idx → pairwise edges from cliques demoted by remove_small_cliques. + // Symmetric: edge (u, v) appears in both u's and v's slices. + csr_var_map_t small_clique_adj; // degrees of each vertex std::vector var_degrees; // number of variables in the original problem @@ -104,7 +207,6 @@ void find_initial_cliques(dual_simplex::user_problem_t& problem, typename mip_solver_settings_t::tolerances_t tolerances, std::shared_ptr>* clique_table_out, cuopt::timer_t& timer, - bool modify_problem, std::atomic* signal_extend = nullptr); template @@ -115,6 +217,9 @@ void build_clique_table(const dual_simplex::user_problem_t& problem, bool fill_var_clique_maps, cuopt::timer_t& timer); +template +void fill_var_clique_maps(clique_table_t& clique_table); + } // namespace cuopt::linear_programming::detail // Possible application to rounding procedure, keeping it as reference diff --git a/cpp/src/mip_heuristics/presolve/multi_probe.cu b/cpp/src/mip_heuristics/presolve/multi_probe.cu index f798957e1c..c91774b36e 100644 --- a/cpp/src/mip_heuristics/presolve/multi_probe.cu +++ b/cpp/src/mip_heuristics/presolve/multi_probe.cu @@ -74,6 +74,7 @@ multi_probe_t::multi_probe_t(mip_solver_context_t& context_, : context(context_), upd_0(*context.problem_ptr), upd_1(*context.problem_ptr), + clique_data(context.problem_ptr->handle_ptr->get_stream()), settings(in_settings) { } @@ -85,6 +86,38 @@ void multi_probe_t::resize(problem_t& problem) upd_1.resize(problem); host_lb.resize(problem.n_variables); host_ub.resize(problem.n_variables); + // Clique-group cache invalidation handled by ensure_clique_data fingerprint. +} + +template +void multi_probe_t::ensure_clique_data(problem_t& pb) +{ + // See bound_presolve_t::ensure_clique_data for rationale. + auto ct_snapshot = pb.get_clique_table_snapshot(); + auto* current_ct = ct_snapshot.get(); + const bool cache_hit = clique_data_built // + && last_built_problem == &pb // + && last_built_clique_table == current_ct // + && last_built_n_variables == pb.n_variables // + && last_built_n_constraints == pb.n_constraints // + && last_built_nnz == pb.nnz; + if (cache_hit) return; + + if (!current_ct) { + clique_data.n_groups = 0; + clique_data_built = false; + return; + } + clique_data.build_from_host(pb, context.problem_ptr->reverse_original_ids, *current_ct); + auto stream = pb.handle_ptr->get_stream(); + upd_0.resize_clique_buffers(clique_data.n_groups, stream); + upd_1.resize_clique_buffers(clique_data.n_groups, stream); + last_built_problem = &pb; + last_built_clique_table = current_ct; + last_built_n_variables = pb.n_variables; + last_built_n_constraints = pb.n_constraints; + last_built_nnz = pb.nnz; + clique_data_built = true; } template @@ -141,34 +174,94 @@ bool multi_probe_t::calculate_bounds_update(problem_t& pb, constexpr i_t zero = 0; constexpr auto n_threads = 256; - if (skip_0 && skip_1) { - return false; + if (skip_0 && skip_1) { return false; } + + auto stream = handle_ptr->get_stream(); + + // Per-probe clique-aware update; static group table is shared via `clique_data`. + auto run_clique_update_for_probe = [&](bounds_update_data_t& upd) { + constexpr i_t warp = raft::WarpSize; + compute_clique_corrections_kernel + <<>>(make_span(clique_data.group_member_offsets), + make_span(clique_data.group_member_vars), + make_span(clique_data.group_member_coeffs), + make_span(clique_data.group_constraint_ids), + make_span(upd.changed_constraints), + make_span(upd.lb), + make_span(upd.ub), + make_span(upd.group_max_correction), + make_span(upd.group_min_correction), + make_span(upd.group_max_pos), + make_span(upd.group_second_max_pos), + make_span(upd.group_min_neg), + make_span(upd.group_second_min_neg), + pb.tolerances.integrality_tolerance); + RAFT_CHECK_CUDA(stream); + + constexpr i_t apply_tpb = 128; + const i_t apply_blocks = (pb.n_constraints + apply_tpb - 1) / apply_tpb; + apply_clique_corrections_to_activity_kernel + <<>>(make_span(clique_data.constraint_group_offsets), + make_span(upd.group_max_correction), + make_span(upd.group_min_correction), + make_span(upd.changed_constraints), + make_span(upd.min_activity), + make_span(upd.max_activity), + pb.n_constraints); + RAFT_CHECK_CUDA(stream); + + update_bounds_kernel_cliq + <<>>(pb.view(), upd.view(), clique_data.view()); + RAFT_CHECK_CUDA(stream); + }; + + const bool use_cliques = !clique_data.empty(); + + if (use_cliques) { + // No dual-probe variant of the clique-aware kernel; run per probe. + if (!skip_0) { + upd_0.bounds_changed.set_value_async(zero, stream); + run_clique_update_for_probe(upd_0); + } + if (!skip_1) { + upd_1.bounds_changed.set_value_async(zero, stream); + run_clique_update_for_probe(upd_1); + } + if (!skip_0) { + i_t h_bounds_changed_0 = upd_0.bounds_changed.value(stream); + CUOPT_LOG_TRACE("Bounds changed upd 0 %d", h_bounds_changed_0); + skip_0 = (h_bounds_changed_0 == zero); + } + if (!skip_1) { + i_t h_bounds_changed_1 = upd_1.bounds_changed.value(stream); + CUOPT_LOG_TRACE("Bounds changed upd 1 %d", h_bounds_changed_1); + skip_1 = (h_bounds_changed_1 == zero); + } } else if (skip_0) { - upd_1.bounds_changed.set_value_async(zero, handle_ptr->get_stream()); + upd_1.bounds_changed.set_value_async(zero, stream); update_bounds_kernel - <<get_stream()>>>(pb.view(), upd_1.view()); - RAFT_CHECK_CUDA(handle_ptr->get_stream()); - i_t h_bounds_changed_1 = upd_1.bounds_changed.value(handle_ptr->get_stream()); + <<>>(pb.view(), upd_1.view()); + RAFT_CHECK_CUDA(stream); + i_t h_bounds_changed_1 = upd_1.bounds_changed.value(stream); CUOPT_LOG_TRACE("Bounds changed upd 1 %d", h_bounds_changed_1); skip_1 = (h_bounds_changed_1 == zero); } else if (skip_1) { - upd_0.bounds_changed.set_value_async(zero, handle_ptr->get_stream()); + upd_0.bounds_changed.set_value_async(zero, stream); update_bounds_kernel - <<get_stream()>>>(pb.view(), upd_0.view()); - RAFT_CHECK_CUDA(handle_ptr->get_stream()); - i_t h_bounds_changed_0 = upd_0.bounds_changed.value(handle_ptr->get_stream()); + <<>>(pb.view(), upd_0.view()); + RAFT_CHECK_CUDA(stream); + i_t h_bounds_changed_0 = upd_0.bounds_changed.value(stream); CUOPT_LOG_TRACE("Bounds changed upd 0 %d", h_bounds_changed_0); skip_0 = (h_bounds_changed_0 == zero); } else { - upd_0.bounds_changed.set_value_async(zero, handle_ptr->get_stream()); - upd_1.bounds_changed.set_value_async(zero, handle_ptr->get_stream()); + upd_0.bounds_changed.set_value_async(zero, stream); + upd_1.bounds_changed.set_value_async(zero, stream); update_bounds_kernel - <<get_stream()>>>( - pb.view(), upd_0.view(), upd_1.view()); - RAFT_CHECK_CUDA(handle_ptr->get_stream()); - i_t h_bounds_changed_0 = upd_0.bounds_changed.value(handle_ptr->get_stream()); + <<>>(pb.view(), upd_0.view(), upd_1.view()); + RAFT_CHECK_CUDA(stream); + i_t h_bounds_changed_0 = upd_0.bounds_changed.value(stream); CUOPT_LOG_TRACE("Bounds changed upd 0 %d", h_bounds_changed_0); - i_t h_bounds_changed_1 = upd_1.bounds_changed.value(handle_ptr->get_stream()); + i_t h_bounds_changed_1 = upd_1.bounds_changed.value(stream); CUOPT_LOG_TRACE("Bounds changed upd 1 %d", h_bounds_changed_1); skip_0 = (h_bounds_changed_0 == zero); @@ -280,6 +373,7 @@ termination_criterion_t multi_probe_t::bound_update_loop(problem_t #include "bounds_update_data.cuh" +#include "clique_activity_corrections.cuh" #include "utils.cuh" namespace cuopt::linear_programming::detail { @@ -67,9 +68,24 @@ class multi_probe_t { void update_host_bounds(const raft::handle_t* handle_ptr, const raft::device_span::type> variable_bounds); void update_device_bounds(const raft::handle_t* handle_ptr); + + // Lazily build (or reuse) the clique group table; idempotent. The static + // table is shared by both probes; dynamic buffers live on each upd_*. + void ensure_clique_data(problem_t& pb); + mip_solver_context_t& context; bounds_update_data_t upd_0; bounds_update_data_t upd_1; + + clique_group_table_t clique_data; + bool clique_data_built{false}; + // See bound_presolve_t for fingerprint rationale. + const problem_t* last_built_problem = nullptr; + const clique_table_t* last_built_clique_table = nullptr; + i_t last_built_n_variables = -1; + i_t last_built_n_constraints = -1; + i_t last_built_nnz = -1; + std::vector host_lb; std::vector host_ub; bool skip_0; diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index 10d80586b4..5cbc2a29e3 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -203,7 +204,7 @@ problem_t::problem_t(const problem_t& problem_) objective_is_integral(problem_.objective_is_integral), lp_state(problem_.lp_state), fixing_helpers(problem_.fixing_helpers, handle_ptr), - clique_table(problem_.clique_table), + clique_table(problem_.get_clique_table_snapshot()), vars_with_objective_coeffs(problem_.vars_with_objective_coeffs), expensive_to_fix_vars(problem_.expensive_to_fix_vars), related_vars_time_limit(problem_.related_vars_time_limit), @@ -261,7 +262,7 @@ problem_t::problem_t(const problem_t& problem_, objective_is_integral(problem_.objective_is_integral), lp_state(problem_.lp_state, handle_ptr), fixing_helpers(problem_.fixing_helpers, handle_ptr), - clique_table(problem_.clique_table), + clique_table(problem_.get_clique_table_snapshot()), vars_with_objective_coeffs(problem_.vars_with_objective_coeffs), expensive_to_fix_vars(problem_.expensive_to_fix_vars), related_vars_time_limit(problem_.related_vars_time_limit), @@ -287,7 +288,7 @@ problem_t::problem_t(const problem_t& problem_, bool no_deep maximize(problem_.maximize), empty(problem_.empty), is_binary_pb(problem_.is_binary_pb), - clique_table(problem_.clique_table), + clique_table(problem_.get_clique_table_snapshot()), // Copy constructor used by PDLP and MIP // PDLP uses the version with no_deep_copy = false which deep copy some fields but doesn't // allocate others that are not needed in PDLP @@ -478,6 +479,26 @@ void csr_to_csc_transpose(const i_t* csr_offsets, raft::copy(csc_values, val_sorted.data(), nnz, stream); } +template +std::shared_ptr> problem_t::get_clique_table_snapshot() const +{ + // Acquire-load so any shared_ptr state written by the publisher before its + // corresponding atomic_store is visible to us. Using std::atomic_load on a + // shared_ptr is deprecated in C++20 but still provided; it is the standard + // one-shot publication primitive for pre-C++20 shared_ptr. + return std::atomic_load_explicit(&clique_table, std::memory_order_acquire); +} + +template +void problem_t::publish_clique_table(std::shared_ptr> ct) +{ + // Single-writer, many-reader publication. Invoked once from B&B's async + // clique-build task. Release-store pairs with the acquire-loads in + // get_clique_table_snapshot and in copy constructors that snapshot the + // shared_ptr during object duplication. + std::atomic_store_explicit(&clique_table, std::move(ct), std::memory_order_release); +} + template void problem_t::compute_transpose_of_problem() { diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index a801cc4067..a0e3bdef0d 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -131,8 +131,23 @@ class problem_t { bool is_integer(f_t val) const; bool integer_equal(f_t val1, f_t val2) const; + // Clique table shared with B&B cut generation. Heuristics read it through + // the thread-safe accessors below because B&B's async clique build may + // publish it from a worker thread while heuristics are running. + // + // - Publication: B&B's async clique-build task calls publish_clique_table + // exactly once when the build completes. This is the only writer. + // - Observation: all heuristic readers go through get_clique_table_snapshot + // (atomic_load on the shared_ptr). Copies of problem_t also snapshot via + // this accessor so concurrent publication does not tear the source. + // + // Direct access to `clique_table` is reserved for code paths that know no + // concurrent writer can exist (e.g. single-threaded tests, destructor). std::shared_ptr> clique_table; + std::shared_ptr> get_clique_table_snapshot() const; + void publish_clique_table(std::shared_ptr> ct); + void get_host_user_problem( cuopt::linear_programming::dual_simplex::user_problem_t& user_problem) const; void set_constraints_from_host_user_problem( diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index ce6b602fba..fba1f1ba9a 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -386,15 +387,32 @@ solution_t mip_solver_t::run_solver() std::placeholders::_2); } - // Create the branch and bound object + // Create the branch and bound object. + // + // Clique-table lifecycle: presolve no longer builds an initial clique + // table, so context.problem_ptr->clique_table is expected to be null + // here. B&B's async build (kicked off inside branch_and_bound_t::solve) + // produces the table and, via the publish callback installed below, + // atomically stores it into context.problem_ptr->clique_table so + // heuristic ensure_clique_data() can observe it on its next iteration. branch_and_bound = std::make_unique>( branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start(), probing_implied_bound, - context.problem_ptr->clique_table); + context.problem_ptr->get_clique_table_snapshot()); context.branch_and_bound_ptr = branch_and_bound.get(); + // Publish the async-built clique_table onto context.problem_ptr so + // heuristics pick it up via the atomic snapshot accessor. + { + auto* pb = context.problem_ptr; + branch_and_bound->set_clique_publish_callback( + [pb](std::shared_ptr> ct) { + pb->publish_clique_table(std::move(ct)); + }); + } + // Convert the best external upper bound from user-space to B&B's internal objective space. // context.problem_ptr is the post-trivial-presolve problem, whose get_solver_obj_from_user_obj // produces values in the same space as B&B node lower bounds. diff --git a/cpp/tests/mip/CMakeLists.txt b/cpp/tests/mip/CMakeLists.txt index 47eb5a5a0f..4cda240320 100644 --- a/cpp/tests/mip/CMakeLists.txt +++ b/cpp/tests/mip/CMakeLists.txt @@ -32,6 +32,7 @@ ConfigureTest(CUTS_TEST ConfigureTest(UNIT_TEST ${CMAKE_CURRENT_SOURCE_DIR}/unit_test.cu ${CMAKE_CURRENT_SOURCE_DIR}/integer_with_real_bounds.cu + ${CMAKE_CURRENT_SOURCE_DIR}/clique_activity_test.cu LABELS numopt) ConfigureTest(EMPTY_FIXED_PROBLEMS_TEST ${CMAKE_CURRENT_SOURCE_DIR}/empty_fixed_problems_test.cu diff --git a/cpp/tests/mip/clique_activity_test.cu b/cpp/tests/mip/clique_activity_test.cu new file mode 100644 index 0000000000..0c58dbe49d --- /dev/null +++ b/cpp/tests/mip/clique_activity_test.cu @@ -0,0 +1,771 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include "../linear_programming/utilities/pdlp_test_utilities.cuh" +#include "mip_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::test { + +namespace { + +// Helpers + +// Build a minimal problem_t. preprocess_problem() populates reverse CSR and +// is_binary_variable, which clique_group_table_t::build_from_host reads. +std::shared_ptr> make_problem(const raft::handle_t& handle, + const std::vector& offsets, + const std::vector& indices, + const std::vector& values, + const std::vector& cnst_lb, + const std::vector& cnst_ub, + const std::vector& var_lb, + const std::vector& var_ub, + const std::vector& var_types) +{ + mps_parser::mps_data_model_t model; + model.set_csr_constraint_matrix(values.data(), + static_cast(values.size()), + indices.data(), + static_cast(indices.size()), + offsets.data(), + static_cast(offsets.size())); + model.set_constraint_lower_bounds(cnst_lb.data(), cnst_lb.size()); + model.set_constraint_upper_bounds(cnst_ub.data(), cnst_ub.size()); + model.set_variable_lower_bounds(var_lb.data(), var_lb.size()); + model.set_variable_upper_bounds(var_ub.data(), var_ub.size()); + std::vector obj(var_lb.size(), 0.0); + model.set_objective_coefficients(obj.data(), obj.size()); + model.set_variable_types(var_types); + model.set_maximize(false); + + auto op = mps_data_model_to_optimization_problem(&handle, model); + auto pb = std::make_shared>(op); + pb->preprocess_problem(); + return pb; +} + +// Build a clique_table with the given large cliques and small adjacency. +// min_clique_size=1 disables internal filtering for tests. +std::shared_ptr> make_clique_table( + int n_vars, + const std::vector>& first, + const std::vector>& addtl = {}, + const std::unordered_map>& small_adj = {}) +{ + auto ct = std::make_shared>( + /*n_vertices=*/2 * n_vars, + /*min_clique_size=*/1, + /*max_clique_size_for_extension=*/1); + ct->first = first; + ct->addtl_cliques = addtl; + // Test-only helper: production uses remove_small_cliques to populate the CSR. + ct->set_small_clique_adj_for_test(small_adj); + // Materialize var_clique_first / var_clique_addtl / first_var_positions + // from `first` and `addtl_cliques`. This mirrors what + // build_clique_table()/find_initial_cliques() do at the end of build. + detail::fill_var_clique_maps(*ct); + return ct; +} + +// Tests for clique_group_table_t::build_from_host + +TEST(clique_activity, build_from_host_no_cliques) +{ + // x0 + x1 + x2 + y >= 2, no clique table → no groups. + const raft::handle_t handle{}; + auto pb = make_problem(handle, + {0, 4}, + {0, 1, 2, 3}, + {1.0, 1.0, 1.0, 1.0}, + {2.0}, + {std::numeric_limits::infinity()}, + {0.0, 0.0, 0.0, 0.0}, + {1.0, 1.0, 1.0, 1.0}, + {'I', 'I', 'I', 'I'}); + auto ct = make_clique_table(/*n_vars=*/4, /*first=*/{}); + + detail::clique_group_table_t data(handle.get_stream()); + data.build_from_host(*pb, /*primary_reverse_original_ids=*/{}, *ct); + EXPECT_TRUE(data.empty()); + EXPECT_EQ(data.n_groups, 0); +} + +TEST(clique_activity, build_from_host_basic_large_clique) +{ + // Clique {0,1,2} on x0 + x1 + x2 + y >= 2 → one group on constraint 0. + const raft::handle_t handle{}; + auto pb = make_problem(handle, + {0, 4}, + {0, 1, 2, 3}, + {1.0, 1.0, 1.0, 1.0}, + {2.0}, + {std::numeric_limits::infinity()}, + {0.0, 0.0, 0.0, 0.0}, + {1.0, 1.0, 1.0, 1.0}, + {'I', 'I', 'I', 'I'}); + auto ct = make_clique_table(/*n_vars=*/4, /*first=*/{{0, 1, 2}}); + + detail::clique_group_table_t data(handle.get_stream()); + data.build_from_host(*pb, /*primary_reverse_original_ids=*/{}, *ct); + ASSERT_FALSE(data.empty()); + ASSERT_EQ(data.n_groups, 1); + + auto stream = handle.get_stream(); + auto h_cnst_ids = host_copy(data.group_constraint_ids, stream); + auto h_mem_off = host_copy(data.group_member_offsets, stream); + auto h_mem_vars = host_copy(data.group_member_vars, stream); + auto h_mem_coeffs = host_copy(data.group_member_coeffs, stream); + auto h_cg_off = host_copy(data.constraint_group_offsets, stream); + auto h_rev_gid = host_copy(data.reverse_group_id, stream); + + ASSERT_EQ(h_cnst_ids.size(), 1u); + EXPECT_EQ(h_cnst_ids[0], 0); + ASSERT_EQ(h_mem_off.size(), 2u); + EXPECT_EQ(h_mem_off[0], 0); + EXPECT_EQ(h_mem_off[1], 3); + ASSERT_EQ(h_mem_vars.size(), 3u); + // member set must be {0, 1, 2} + std::vector sorted_vars = h_mem_vars; + std::sort(sorted_vars.begin(), sorted_vars.end()); + EXPECT_EQ(sorted_vars, (std::vector{0, 1, 2})); + for (double c : h_mem_coeffs) + EXPECT_DOUBLE_EQ(c, 1.0); + + // constraint_group_offsets: [0, 1] (1 constraint, 1 group) + ASSERT_EQ(h_cg_off.size(), 2u); + EXPECT_EQ(h_cg_off[0], 0); + EXPECT_EQ(h_cg_off[1], 1); + + // reverse_group_id: vars 0,1,2 in group 0; y (var 3) is -1. + ASSERT_EQ(h_rev_gid.size(), 4u); + int count_in_group = 0; + int count_out = 0; + for (int g : h_rev_gid) { + if (g == 0) + count_in_group++; + else if (g == -1) + count_out++; + } + EXPECT_EQ(count_in_group, 3); + EXPECT_EQ(count_out, 1); +} + +TEST(clique_activity, build_from_host_accepts_complement_literal_clique) +{ + // Clique {x0, x1, ~x2}: effective coeffs {+1, +1, -1}. + const raft::handle_t handle{}; + const int n_vars = 4; + auto pb = make_problem(handle, + {0, 4}, + {0, 1, 2, 3}, + {1.0, 1.0, 1.0, 1.0}, + {2.0}, + {std::numeric_limits::infinity()}, + {0.0, 0.0, 0.0, 0.0}, + {1.0, 1.0, 1.0, 1.0}, + {'I', 'I', 'I', 'I'}); + auto ct = + make_clique_table(n_vars, /*first=*/{{0, 1, n_vars + 2}}); // last entry is complement of x2 + + detail::clique_group_table_t data(handle.get_stream()); + data.build_from_host(*pb, /*primary_reverse_original_ids=*/{}, *ct); + ASSERT_EQ(data.n_groups, 1); + + auto stream = handle.get_stream(); + auto h_mem_vars = host_copy(data.group_member_vars, stream); + auto h_mem_coeffs = host_copy(data.group_member_coeffs, stream); + ASSERT_EQ(h_mem_vars.size(), 3u); + std::unordered_map got; + for (std::size_t i = 0; i < h_mem_vars.size(); ++i) { + got[h_mem_vars[i]] = h_mem_coeffs[i]; + } + ASSERT_EQ(got.size(), 3u); + EXPECT_DOUBLE_EQ(got[0], 1.0); + EXPECT_DOUBLE_EQ(got[1], 1.0); + EXPECT_DOUBLE_EQ(got[2], -1.0); +} + +TEST(clique_activity, build_from_host_small_adj_list_fallback) +{ + // No large cliques; triangle of small edges should be extracted as one group. + const raft::handle_t handle{}; + auto pb = make_problem(handle, + {0, 3}, + {0, 1, 2}, + {1.0, 1.0, 1.0}, + {-std::numeric_limits::infinity()}, + {1.0}, + {0.0, 0.0, 0.0}, + {1.0, 1.0, 1.0}, + {'I', 'I', 'I'}); + + std::unordered_map> adj; + adj[0] = {1, 2}; + adj[1] = {0, 2}; + adj[2] = {0, 1}; + auto ct = make_clique_table(/*n_vars=*/3, /*first=*/{}, /*addtl=*/{}, adj); + + detail::clique_group_table_t data(handle.get_stream()); + data.build_from_host(*pb, /*primary_reverse_original_ids=*/{}, *ct); + ASSERT_EQ(data.n_groups, 1); + auto stream = handle.get_stream(); + auto h_mem = host_copy(data.group_member_vars, stream); + std::sort(h_mem.begin(), h_mem.end()); + EXPECT_EQ(h_mem, (std::vector{0, 1, 2})); +} + +// Tests for compute_clique_corrections_kernel + +TEST(clique_activity, compute_corrections_kernel_values) +{ + // Coeffs {3,5,2}, lb=0, ub=1 → max_correction = 10-5 = 5, min_correction = 0. + const raft::handle_t handle{}; + auto stream = handle.get_stream(); + + std::vector h_offsets = {0, 3}; + std::vector h_vars = {0, 1, 2}; + std::vector h_coeffs = {3.0, 5.0, 2.0}; + std::vector h_gcnst = {0}; + std::vector h_chgcnst = {1}; + std::vector h_lb = {0.0, 0.0, 0.0}; + std::vector h_ub = {1.0, 1.0, 1.0}; + + auto d_offsets = device_copy(h_offsets, stream); + auto d_vars = device_copy(h_vars, stream); + auto d_coeffs = device_copy(h_coeffs, stream); + auto d_gcnst = device_copy(h_gcnst, stream); + auto d_chgcnst = device_copy(h_chgcnst, stream); + auto d_lb = device_copy(h_lb, stream); + auto d_ub = device_copy(h_ub, stream); + + rmm::device_uvector d_max_corr(1, stream); + rmm::device_uvector d_min_corr(1, stream); + rmm::device_uvector d_max1(1, stream); + rmm::device_uvector d_max2(1, stream); + rmm::device_uvector d_min1(1, stream); + rmm::device_uvector d_min2(1, stream); + + constexpr int warp = raft::WarpSize; + detail::compute_clique_corrections_kernel + <<<1, warp, 0, stream>>>(make_span(d_offsets), + make_span(d_vars), + make_span(d_coeffs), + make_span(d_gcnst), + make_span(d_chgcnst), + make_span(d_lb), + make_span(d_ub), + make_span(d_max_corr), + make_span(d_min_corr), + make_span(d_max1), + make_span(d_max2), + make_span(d_min1), + make_span(d_min2), + /*int_tol=*/1e-6); + RAFT_CHECK_CUDA(stream); + + auto h_max_corr = host_copy(d_max_corr, stream); + auto h_min_corr = host_copy(d_min_corr, stream); + auto h_max1_h = host_copy(d_max1, stream); + auto h_max2_h = host_copy(d_max2, stream); + auto h_min1_h = host_copy(d_min1, stream); + auto h_min2_h = host_copy(d_min2, stream); + + EXPECT_DOUBLE_EQ(h_max_corr[0], 5.0); + EXPECT_DOUBLE_EQ(h_min_corr[0], 0.0); + EXPECT_DOUBLE_EQ(h_max1_h[0], 5.0); + EXPECT_DOUBLE_EQ(h_max2_h[0], 3.0); + EXPECT_DOUBLE_EQ(h_min1_h[0], 0.0); + EXPECT_DOUBLE_EQ(h_min2_h[0], 0.0); +} + +TEST(clique_activity, compute_corrections_kernel_skips_fixed_members) +{ + // x1 (coeff=5) fixed; only x0, x2 active → max_correction = 5-3 = 2. + const raft::handle_t handle{}; + auto stream = handle.get_stream(); + + std::vector h_offsets = {0, 3}; + std::vector h_vars = {0, 1, 2}; + std::vector h_coeffs = {3.0, 5.0, 2.0}; + std::vector h_gcnst = {0}; + std::vector h_chgcnst = {1}; + std::vector h_lb = {0.0, 1.0, 0.0}; + std::vector h_ub = {1.0, 1.0, 1.0}; + + auto d_offsets = device_copy(h_offsets, stream); + auto d_vars = device_copy(h_vars, stream); + auto d_coeffs = device_copy(h_coeffs, stream); + auto d_gcnst = device_copy(h_gcnst, stream); + auto d_chgcnst = device_copy(h_chgcnst, stream); + auto d_lb = device_copy(h_lb, stream); + auto d_ub = device_copy(h_ub, stream); + + rmm::device_uvector d_max_corr(1, stream); + rmm::device_uvector d_min_corr(1, stream); + rmm::device_uvector d_max1(1, stream); + rmm::device_uvector d_max2(1, stream); + rmm::device_uvector d_min1(1, stream); + rmm::device_uvector d_min2(1, stream); + + constexpr int warp = raft::WarpSize; + detail::compute_clique_corrections_kernel + <<<1, warp, 0, stream>>>(make_span(d_offsets), + make_span(d_vars), + make_span(d_coeffs), + make_span(d_gcnst), + make_span(d_chgcnst), + make_span(d_lb), + make_span(d_ub), + make_span(d_max_corr), + make_span(d_min_corr), + make_span(d_max1), + make_span(d_max2), + make_span(d_min1), + make_span(d_min2), + 1e-6); + RAFT_CHECK_CUDA(stream); + + auto h_max_corr = host_copy(d_max_corr, stream); + auto h_max1_h = host_copy(d_max1, stream); + auto h_max2_h = host_copy(d_max2, stream); + + EXPECT_DOUBLE_EQ(h_max_corr[0], 2.0); + EXPECT_DOUBLE_EQ(h_max1_h[0], 3.0); + EXPECT_DOUBLE_EQ(h_max2_h[0], 2.0); +} + +// End-to-end bound propagation: clique tightens what stock can't. + +TEST(clique_activity, bound_propagation_tightens_with_clique) +{ + // x0+x1+x2+y >= 2 with clique {x0,x1,x2}: + // stock leaves y in [0,1]; clique-aware corrects max_a to 2 → y.lb = 1. + const raft::handle_t handle{}; + auto pb = make_problem(handle, + {0, 4}, + {0, 1, 2, 3}, + {1.0, 1.0, 1.0, 1.0}, + {2.0}, + {std::numeric_limits::infinity()}, + {0.0, 0.0, 0.0, 0.0}, + {1.0, 1.0, 1.0, 1.0}, + {'I', 'I', 'I', 'I'}); + + mip_solver_settings_t settings{}; + detail::mip_solver_context_t context(pb->handle_ptr, pb.get(), settings); + + detail::bound_presolve_t bp_no_clique(context); + bp_no_clique.solve(*pb); + auto stream = handle.get_stream(); + auto h_lb_base = host_copy(bp_no_clique.upd.lb, stream); + auto h_ub_base = host_copy(bp_no_clique.upd.ub, stream); + + pb->clique_table = make_clique_table(/*n_vars=*/4, /*first=*/{{0, 1, 2}}); + + detail::bound_presolve_t bp_with_clique(context); + bp_with_clique.solve(*pb); + auto h_lb_cliq = host_copy(bp_with_clique.upd.lb, stream); + auto h_ub_cliq = host_copy(bp_with_clique.upd.ub, stream); + + EXPECT_DOUBLE_EQ(h_lb_base[3], 0.0); + EXPECT_DOUBLE_EQ(h_lb_cliq[3], 1.0); + + // Clique members stay in [0, 1]: any two=1 remains feasible per-variable. + for (int i = 0; i < 3; ++i) { + EXPECT_DOUBLE_EQ(h_lb_cliq[i], 0.0); + EXPECT_DOUBLE_EQ(h_ub_cliq[i], 1.0); + } +} + +TEST(clique_activity, bound_propagation_tightens_with_complement_literal_clique) +{ + // x0+x1-x2+y >= 1 with clique {x0,x1,~x2}: clique-aware lifts y.lb 0→1. + const raft::handle_t handle{}; + const int n_vars = 4; + auto pb = make_problem(handle, + {0, 4}, + {0, 1, 2, 3}, + {1.0, 1.0, -1.0, 1.0}, + {1.0}, + {std::numeric_limits::infinity()}, + {0.0, 0.0, 0.0, 0.0}, + {1.0, 1.0, 1.0, 1.0}, + {'I', 'I', 'I', 'I'}); + + mip_solver_settings_t settings{}; + detail::mip_solver_context_t context(pb->handle_ptr, pb.get(), settings); + + detail::bound_presolve_t bp_no_clique(context); + bp_no_clique.solve(*pb); + auto stream = handle.get_stream(); + auto h_lb_base = host_copy(bp_no_clique.upd.lb, stream); + EXPECT_DOUBLE_EQ(h_lb_base[3], 0.0); + + pb->clique_table = make_clique_table(n_vars, /*first=*/{{0, 1, n_vars + 2}}); + detail::bound_presolve_t bp_with_clique(context); + bp_with_clique.solve(*pb); + auto h_lb_cliq = host_copy(bp_with_clique.upd.lb, stream); + EXPECT_DOUBLE_EQ(h_lb_cliq[3], 1.0); +} + +TEST(clique_activity, bound_propagation_matches_when_clique_is_noop) +{ + // x0+x1 <= 1: clique correction has no var to propagate onto → identical bounds. + const raft::handle_t handle{}; + auto pb = make_problem(handle, + {0, 2}, + {0, 1}, + {1.0, 1.0}, + {-std::numeric_limits::infinity()}, + {1.0}, + {0.0, 0.0}, + {1.0, 1.0}, + {'I', 'I'}); + + mip_solver_settings_t settings{}; + detail::mip_solver_context_t context(pb->handle_ptr, pb.get(), settings); + + detail::bound_presolve_t bp_no(context); + bp_no.solve(*pb); + auto stream = handle.get_stream(); + auto h_lb_no = host_copy(bp_no.upd.lb, stream); + auto h_ub_no = host_copy(bp_no.upd.ub, stream); + + pb->clique_table = make_clique_table(/*n_vars=*/2, /*first=*/{{0, 1}}); + detail::bound_presolve_t bp_yes(context); + bp_yes.solve(*pb); + auto h_lb_yes = host_copy(bp_yes.upd.lb, stream); + auto h_ub_yes = host_copy(bp_yes.upd.ub, stream); + + ASSERT_EQ(h_lb_no.size(), h_lb_yes.size()); + for (size_t i = 0; i < h_lb_no.size(); ++i) { + EXPECT_DOUBLE_EQ(h_lb_no[i], h_lb_yes[i]) << "lb mismatch at var " << i; + EXPECT_DOUBLE_EQ(h_ub_no[i], h_ub_yes[i]) << "ub mismatch at var " << i; + } +} + +// Clique-aware vs stock monotonicity invariant +// (see CLIQUE_PIPELINE_AUDIT.md for the proof): +// lb_stock <= lb_clique; ub_stock >= ub_clique +// min_activity_stock <= min_activity_clique +// max_activity_stock >= max_activity_clique + +namespace { + +struct presolve_result_t { + std::vector lb; + std::vector ub; + std::vector min_a; + std::vector max_a; +}; + +// Run bound_presolve_t::solve and snapshot converged buffers. `ct` may be null +// for the stock path; otherwise it's attached to pb before solving. +presolve_result_t run_presolve(detail::problem_t& pb, + std::shared_ptr> ct) +{ + pb.clique_table = std::move(ct); + mip_solver_settings_t settings{}; + detail::mip_solver_context_t context(pb.handle_ptr, &pb, settings); + detail::bound_presolve_t bp(context); + bp.solve(pb); + auto stream = pb.handle_ptr->get_stream(); + return {host_copy(bp.upd.lb, stream), + host_copy(bp.upd.ub, stream), + host_copy(bp.upd.min_activity, stream), + host_copy(bp.upd.max_activity, stream)}; +} + +// Assert: clique tightens or matches on every var and constraint. +void expect_clique_tighter_or_equal(const presolve_result_t& stock, + const presolve_result_t& clique, + double tol = 1e-9) +{ + ASSERT_EQ(stock.lb.size(), clique.lb.size()); + ASSERT_EQ(stock.ub.size(), clique.ub.size()); + ASSERT_EQ(stock.min_a.size(), clique.min_a.size()); + ASSERT_EQ(stock.max_a.size(), clique.max_a.size()); + + for (std::size_t v = 0; v < stock.lb.size(); ++v) { + EXPECT_LE(stock.lb[v], clique.lb[v] + tol) + << "lb LOOSER under clique at var " << v << " (stock=" << stock.lb[v] + << ", clique=" << clique.lb[v] << ")"; + EXPECT_GE(stock.ub[v], clique.ub[v] - tol) + << "ub LOOSER under clique at var " << v << " (stock=" << stock.ub[v] + << ", clique=" << clique.ub[v] << ")"; + } + for (std::size_t c = 0; c < stock.min_a.size(); ++c) { + // Skip free constraints (inf stored activity). + const double inf = std::numeric_limits::infinity(); + if (std::abs(stock.min_a[c]) == inf || std::abs(stock.max_a[c]) == inf) continue; + EXPECT_LE(stock.min_a[c], clique.min_a[c] + tol) + << "min_activity LOOSER under clique at cnst " << c << " (stock=" << stock.min_a[c] + << ", clique=" << clique.min_a[c] << ")"; + EXPECT_GE(stock.max_a[c], clique.max_a[c] - tol) + << "max_activity LOOSER under clique at cnst " << c << " (stock=" << stock.max_a[c] + << ", clique=" << clique.max_a[c] << ")"; + } +} + +// Assert: at least one var or constraint tightens strictly. Guards against +// degenerate tests where the clique fails to consume. +void expect_some_strict_tightening(const presolve_result_t& stock, + const presolve_result_t& clique, + double tol = 1e-9) +{ + bool found = false; + for (std::size_t v = 0; v < stock.lb.size(); ++v) { + if (clique.lb[v] > stock.lb[v] + tol) { + found = true; + break; + } + if (clique.ub[v] < stock.ub[v] - tol) { + found = true; + break; + } + } + if (!found) { + for (std::size_t c = 0; c < stock.min_a.size(); ++c) { + if (clique.min_a[c] > stock.min_a[c] + tol) { + found = true; + break; + } + if (clique.max_a[c] < stock.max_a[c] - tol) { + found = true; + break; + } + } + } + EXPECT_TRUE(found) << "Expected at least one strict tightening; clique not being consumed."; +} + +} // namespace + +TEST(clique_activity, monotonicity_cascading_tightening) +{ + // c0: x0+x1+x2+y0 >= 2 (clique {x0,x1,x2}) + // c1: y0+z <= 5 + // Clique lifts y0.lb 0→1, which cascades to tighten z.ub 5→4. + const raft::handle_t handle{}; + const int n_vars = 5; // x0, x1, x2, y0, z + const double inf = std::numeric_limits::infinity(); + auto pb = make_problem(handle, + {0, 4, 6}, + {0, 1, 2, 3, /*c1*/ 3, 4}, + {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, + {2.0, -inf}, + {inf, 5.0}, + {0.0, 0.0, 0.0, 0.0, 0.0}, + {1.0, 1.0, 1.0, 1.0, 10.0}, + {'I', 'I', 'I', 'I', 'C'}); + + auto stock_pb = run_presolve(*pb, /*ct=*/nullptr); + auto clique_pb = run_presolve(*pb, make_clique_table(n_vars, /*first=*/{{0, 1, 2}})); + + expect_clique_tighter_or_equal(stock_pb, clique_pb); + + EXPECT_DOUBLE_EQ(stock_pb.lb[3], 0.0); + EXPECT_DOUBLE_EQ(stock_pb.ub[4], 5.0); + EXPECT_DOUBLE_EQ(clique_pb.lb[3], 1.0); + EXPECT_DOUBLE_EQ(clique_pb.ub[4], 4.0); + + expect_some_strict_tightening(stock_pb, clique_pb); +} + +TEST(clique_activity, monotonicity_independent_cliques_multi_constraint) +{ + // Two independent cliques + a coupling constraint. + // Clique lifts y0,y1 to 1; min_a(c2) becomes 2 → z.ub 3→1. + const raft::handle_t handle{}; + const int n_vars = 9; // x0..x5, y0, y1, z + const double inf = std::numeric_limits::infinity(); + auto pb = make_problem(handle, + /*offsets=*/{0, 4, 8, 11}, + /*indices=*/{0, 1, 2, 6, 3, 4, 5, 7, 6, 7, 8}, + /*values=*/{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + /*cnst_lb=*/{2.0, 2.0, -inf}, + /*cnst_ub=*/{inf, inf, 3.0}, + /*var_lb=*/{0, 0, 0, 0, 0, 0, 0, 0, 0}, + /*var_ub=*/{1, 1, 1, 1, 1, 1, 1, 1, 10}, + /*var_types=*/{'I', 'I', 'I', 'I', 'I', 'I', 'I', 'I', 'C'}); + + auto stock_pb = run_presolve(*pb, /*ct=*/nullptr); + auto clique_pb = run_presolve(*pb, make_clique_table(n_vars, /*first=*/{{0, 1, 2}, {3, 4, 5}})); + + expect_clique_tighter_or_equal(stock_pb, clique_pb); + + EXPECT_DOUBLE_EQ(stock_pb.lb[6], 0.0); + EXPECT_DOUBLE_EQ(stock_pb.lb[7], 0.0); + EXPECT_DOUBLE_EQ(clique_pb.lb[6], 1.0); + EXPECT_DOUBLE_EQ(clique_pb.lb[7], 1.0); + EXPECT_GE(stock_pb.ub[8] + 1e-9, clique_pb.ub[8]); + EXPECT_LE(clique_pb.ub[8], 1.0 + 1e-9); + + expect_some_strict_tightening(stock_pb, clique_pb); +} + +TEST(clique_activity, monotonicity_complement_literal_clique) +{ + // c0: x0+x1-x2+y >= 1 with clique {x0,x1,~x2}: clique lifts y to 1. + const raft::handle_t handle{}; + const int n_vars = 4; + const double inf = std::numeric_limits::infinity(); + auto pb = make_problem(handle, + {0, 4}, + {0, 1, 2, 3}, + {1.0, 1.0, -1.0, 1.0}, + {1.0}, + {inf}, + {0.0, 0.0, 0.0, 0.0}, + {1.0, 1.0, 1.0, 1.0}, + {'I', 'I', 'I', 'I'}); + + auto stock_pb = run_presolve(*pb, /*ct=*/nullptr); + auto clique_pb = run_presolve(*pb, make_clique_table(n_vars, /*first=*/{{0, 1, n_vars + 2}})); + + expect_clique_tighter_or_equal(stock_pb, clique_pb); + EXPECT_DOUBLE_EQ(stock_pb.lb[3], 0.0); + EXPECT_DOUBLE_EQ(clique_pb.lb[3], 1.0); + expect_some_strict_tightening(stock_pb, clique_pb); +} + +TEST(clique_activity, monotonicity_holds_when_clique_is_noop) +{ + // Reuse the "no-op clique" shape: the clique structure exists but the + // problem cannot tighten further on any variable. The invariant must + // still hold trivially (everything matches), and no LOOSENING must occur + // on any var/cnst as a side effect of the corrections being applied and + // then undone by the per-cnst stock-minus-true peel-off in + // update_bounds_per_cnst_cliq. + const raft::handle_t handle{}; + const double inf = std::numeric_limits::infinity(); + auto pb = make_problem( + handle, {0, 2}, {0, 1}, {1.0, 1.0}, {-inf}, {1.0}, {0.0, 0.0}, {1.0, 1.0}, {'I', 'I'}); + + auto stock_pb = run_presolve(*pb, /*ct=*/nullptr); + auto clique_pb = run_presolve(*pb, make_clique_table(/*n_vars=*/2, /*first=*/{{0, 1}})); + + expect_clique_tighter_or_equal(stock_pb, clique_pb); + + // No-op problem: drift would mean the kernel is over-applying corrections. + ASSERT_EQ(stock_pb.lb.size(), clique_pb.lb.size()); + for (std::size_t i = 0; i < stock_pb.lb.size(); ++i) { + EXPECT_DOUBLE_EQ(stock_pb.lb[i], clique_pb.lb[i]) << "lb drift at var " << i; + EXPECT_DOUBLE_EQ(stock_pb.ub[i], clique_pb.ub[i]) << "ub drift at var " << i; + } +} + +} // namespace + +// Real-instance monotonicity tests on small binary-heavy MPS files in +// datasets/mip/. Tolerance 1e-6 absorbs FP drift over many iterations. + +namespace { + +struct mps_problem_with_cliques_t { + std::shared_ptr> pb; + std::shared_ptr> ct; +}; + +// Parse MPS, build preprocessed problem, build production clique table. +mps_problem_with_cliques_t load_mps_with_cliques(const raft::handle_t& handle, + const std::string& rel_mps_path) +{ + const auto abs_path = make_path_absolute(rel_mps_path); + auto model = cuopt::mps_parser::parse_mps(abs_path, false); + handle.sync_stream(); + + auto op = mps_data_model_to_optimization_problem(&handle, model); + auto pb = std::make_shared>(op); + pb->preprocess_problem(); + + // build_clique_table operates on dual_simplex::user_problem_t. + dual_simplex::user_problem_t host_problem(pb->handle_ptr); + pb->get_host_user_problem(host_problem); + + // min_clique_size=1 (vs. production default 512) so small test instances + // actually emit cliques. + detail::clique_config_t clique_config{}; + clique_config.min_clique_size = 1; + auto ct = std::make_shared>( + /*n_vertices=*/2 * host_problem.num_cols, + clique_config.min_clique_size, + clique_config.max_clique_size_for_extension); + + mip_solver_settings_t settings{}; + cuopt::timer_t timer(std::numeric_limits::infinity()); + detail::build_clique_table(host_problem, + *ct, + settings.tolerances, + /*remove_small_cliques=*/true, + /*extend=*/true, + timer); + return {std::move(pb), std::move(ct)}; +} + +// Run stock then clique-aware presolve and assert monotonicity (tol=1e-6). +void run_real_mps_monotonicity(const std::string& rel_mps_path) +{ + const raft::handle_t handle{}; + auto bundle = load_mps_with_cliques(handle, rel_mps_path); + + auto stock_pb = run_presolve(*bundle.pb, /*ct=*/nullptr); + auto clique_pb = run_presolve(*bundle.pb, bundle.ct); + + expect_clique_tighter_or_equal(stock_pb, clique_pb, /*tol=*/1e-6); +} + +} // namespace + +// Tiny pure-binary set-covering smoke test. +TEST(clique_activity, monotonicity_real_dominating_set) +{ + run_real_mps_monotonicity("mip/dominating_set.mps"); +} + +// Sudoku: clique-rich set-packing structure exercises dense-clique path. +TEST(clique_activity, monotonicity_real_sudoku) { run_real_mps_monotonicity("mip/sudoku.mps"); } + +// Coding-theory instance with many short pairwise conflicts. +TEST(clique_activity, monotonicity_real_cod105_max) +{ + run_real_mps_monotonicity("mip/cod105_max.mps"); +} + +// Mixed binary cliques + a continuous column. +TEST(clique_activity, monotonicity_real_50v_10_free_bound) +{ + run_real_mps_monotonicity("mip/50v-10-free-bound.mps"); +} + +} // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/cuts_test.cu b/cpp/tests/mip/cuts_test.cu index 33b5457dfe..ce8aff0bcb 100644 --- a/cpp/tests/mip/cuts_test.cu +++ b/cpp/tests/mip/cuts_test.cu @@ -1091,13 +1091,16 @@ TEST(cuts, clique_phase1_remove_small_cliques_preserves_addtl_conflicts) { const raft::handle_t handle{}; auto problem = create_weighted_addtl_conflict_problem(); - // Force base clique {x2,x3} to be considered "small" and removed. - auto clique_table = build_clique_table_for_model_with_min_size(handle, problem, 2); + // Force base clique {x2,x3} (size 2) to be considered "small" and removed. + // `remove_small_cliques` drops cliques whose size is strictly less than + // `min_clique_size`, so 3 is the smallest threshold that demotes a size-2 + // base clique into pairwise edges. + auto clique_table = build_clique_table_for_model_with_min_size(handle, problem, 3); EXPECT_TRUE(clique_table.first.empty()); EXPECT_TRUE(clique_table.addtl_cliques.empty()); - // Conflicts must remain materialized in adj_list_small_cliques after removals. + // Conflicts must remain materialized in small_clique_adj after removals. EXPECT_TRUE(clique_table.check_adjacency(1, 3)); EXPECT_TRUE(clique_table.check_adjacency(3, 1)); EXPECT_TRUE(clique_table.check_adjacency(2, 3)); @@ -1275,8 +1278,10 @@ TEST(cuts, clique_neos8_phase1_addtl_suffix_conflicts_materialized) TEST(cuts, clique_neos8_phase1_symmetry_and_degree_cache_consistency) { - auto& clique_table = get_neos8_clique_table_cached(); - const int n_vertices = static_cast(clique_table.var_clique_map_first.size()); + auto& clique_table = get_neos8_clique_table_cached(); + // var_clique_first.offsets has size n_vertices + 1; the literal-vertex + // space is [0, 2 * n_variables). + const int n_vertices = 2 * clique_table.n_variables; ASSERT_GT(n_vertices, 0); const int sample_size = std::min(n_vertices, 24); @@ -1302,9 +1307,7 @@ TEST(cuts, clique_neos8_phase1_symmetry_and_degree_cache_consistency) } } -// Disabled: hits time limit on ARM (L4) instead of Optimal. -// https://github.com/NVIDIA/cuopt/issues/972 -TEST(cuts, DISABLED_clique_neos8_phase2_no_cut_off_optimal_solution_validation) +TEST(cuts, clique_neos8_phase2_no_cut_off_optimal_solution_validation) { auto& no_cut_mip = get_neos8_optimal_solution_no_cuts_cached(); ASSERT_EQ(no_cut_mip.status, mip_termination_status_t::Optimal); @@ -1347,9 +1350,7 @@ TEST(cuts, clique_neos8_phase3_fractional_separation_must_cut_off) } } -// Disabled: depends on phase2 cached result which fails on ARM (L4). -// https://github.com/NVIDIA/cuopt/issues/972 -TEST(cuts, DISABLED_clique_neos8_phase4_fault_isolation_binary_search) +TEST(cuts, clique_neos8_phase4_fault_isolation_binary_search) { auto& no_cut_mip = get_neos8_optimal_solution_no_cuts_cached(); ASSERT_EQ(no_cut_mip.status, mip_termination_status_t::Optimal);