Dual simplex performance improvements#1043
Conversation
…ering, A row nonbasic end, dynamic refactorization. 1.16X on NETLIB
| reliability_branching_settings_t<i_t, f_t> reliability_branching_settings; | ||
|
|
||
| csc_matrix_t<i_t, f_t> AT; // Transpose of the constraint matrix A | ||
| csr_matrix_t<i_t, f_t> Arow; |
There was a problem hiding this comment.
Is it an invariant that all the nonbasic coefficients appear first, or only in some contexts? Consider documenting. I'm also wondering if there's scope for a helper struct to group Arow with nonbasic_end since nonbasic_end is a property of the stored representation of the matrix.
There was a problem hiding this comment.
In order to use the compute_delta_z function the nonbasic coefficients must appear first. I can add some documentation and consider grouping with a struct.
| @@ -143,29 +143,24 @@ void compute_delta_z(const csc_matrix_t<i_t, f_t>& A_transpose, | |||
| // delta_zN = - N'*delta_y | |||
| const i_t nz_delta_y = delta_y.i.size(); | |||
| size_t nnz_processed = 0; | |||
| size_t nonbasic_marked = 0; | |||
| for (i_t k = 0; k < nz_delta_y; k++) { | |||
| const i_t i = delta_y.i[k]; | |||
| const f_t delta_y_i = delta_y.x[k]; | |||
| if (std::abs(delta_y_i) < 1e-12) { continue; } | |||
| const i_t row_start = A_transpose.col_start[i]; | |||
| const i_t row_end = A_transpose.col_start[i + 1]; | |||
| const i_t row_start = Arow.row_start[i]; | |||
| const i_t row_end = nonbasic_end[i] + 1; | |||
| nnz_processed += row_end - row_start; | |||
| for (i_t p = row_start; p < row_end; ++p) { | |||
| const i_t j = A_transpose.i[p]; | |||
| if (nonbasic_mark[j] >= 0) { | |||
| delta_z[j] -= delta_y_i * A_transpose.x[p]; | |||
| nonbasic_marked++; | |||
| if (!delta_z_mark[j]) { | |||
| delta_z_mark[j] = 1; | |||
| delta_z_indices.push_back(j); | |||
| } | |||
| const i_t j = Arow.j[p]; | |||
| delta_z[j] -= delta_y_i * Arow.x[p]; | |||
| if (!delta_z_mark[j]) { | |||
| delta_z_mark[j] = 1; | |||
| delta_z_indices.push_back(j); | |||
| } | |||
| } | |||
| } | |||
| work_estimate += 4 * nz_delta_y; | |||
| work_estimate += 2 * nnz_processed; | |||
| work_estimate += 3 * nonbasic_marked; | |||
| work_estimate += 4 * nnz_processed; | |||
| work_estimate += 2 * delta_z_indices.size(); | |||
|
|
|||
| // delta_zB = sigma*ei | |||
| @@ -205,6 +200,102 @@ void compute_delta_z(const csc_matrix_t<i_t, f_t>& A_transpose, | |||
| #endif | |||
| } | |||
|
|
|||
| template <typename i_t, typename f_t> | |||
| void compute_initial_nonbasic_end(const std::vector<i_t>& basic_mark, | |||
| csr_matrix_t<i_t, f_t>& Arow, | |||
| std::vector<i_t>& nonbasic_end) | |||
| { | |||
| i_t m = Arow.m; | |||
| // Swap coefficients so that all coefficients for nonbasic variables in row i | |||
| // appear from Arow.row_start[i] to nonbasic_end[i] | |||
| for (i_t i = 0; i < m; i++) { | |||
| i_t lo = Arow.row_start[i]; | |||
| i_t hi = Arow.row_start[i + 1] - 1; | |||
| while (lo <= hi) { | |||
| const i_t j = Arow.j[lo]; | |||
| if (basic_mark[j] >= 0) { | |||
| // Swap coefficients | |||
| std::swap(Arow.j[lo], Arow.j[hi]); | |||
| std::swap(Arow.x[lo], Arow.x[hi]); | |||
| hi--; | |||
| } else { | |||
| lo++; | |||
| } | |||
| } | |||
| nonbasic_end[i] = hi; | |||
| } | |||
| } | |||
|
|
|||
| template <typename i_t, typename f_t> | |||
| void update_Arow(i_t leaving, | |||
| i_t entering, | |||
| const csc_matrix_t<i_t, f_t>& A, | |||
| std::vector<i_t>& row_mark, | |||
| std::vector<i_t>& nonbasic_end, | |||
| csr_matrix_t<i_t, f_t>& Arow, | |||
| f_t& work_estimate) | |||
| { | |||
| // Update the Arow matrix to reflect the new basis. So | |||
| // that the coefficients for all nonbasic variables in row i | |||
| // appear in Arow.row_start[i] to nonbasic_end[i]. | |||
| // Swap the coefficients for the leaving and entering variables | |||
| // The leaving variable is now nonbasic, the entering variable is now basic | |||
| row_mark.clear(); | |||
| const i_t col_start_enter = A.col_start[entering]; | |||
| const i_t col_end_enter = A.col_start[entering + 1]; | |||
| for (i_t p = col_start_enter; p < col_end_enter; p++) { | |||
| const i_t i = A.i[p]; | |||
| row_mark.push_back(i); | |||
| } | |||
| work_estimate += 2*(col_end_enter - col_start_enter); | |||
|
|
|||
| // Move the coefficients for the entering variable to the end of the nonbasics | |||
| // And decrement the nonbasic count for the row | |||
| for (i_t i: row_mark) { | |||
| const i_t row_start = Arow.row_start[i]; | |||
| const i_t nb_end = nonbasic_end[i]; | |||
| for (i_t p = row_start; p <= nb_end; p++) { | |||
| const i_t j = Arow.j[p]; | |||
| if (j == entering) { | |||
| std::swap(Arow.j[p], Arow.j[nb_end]); | |||
| std::swap(Arow.x[p], Arow.x[nb_end]); | |||
| nonbasic_end[i]--; | |||
| break; | |||
| } | |||
| } | |||
| work_estimate += nb_end - row_start; | |||
| } | |||
| work_estimate += 2*row_mark.size(); | |||
|
|
|||
| row_mark.clear(); | |||
| const i_t col_start_leave = A.col_start[leaving]; | |||
| const i_t col_end_leave = A.col_start[leaving + 1]; | |||
| for (i_t p = col_start_leave; p < col_end_leave; p++) { | |||
| const i_t i = A.i[p]; | |||
| row_mark.push_back(i); | |||
| } | |||
| work_estimate += 2*(col_end_leave - col_start_leave); | |||
|
|
|||
|
|
|||
| // Move the coefficient for the leaving variable to the end of the nonbasics | |||
| // And increment the nonbasic count for the row | |||
| for (i_t i: row_mark) { | |||
| const i_t nb_end = nonbasic_end[i]; | |||
| const i_t row_end = Arow.row_start[i + 1]; | |||
| for (i_t p = nb_end + 1; p < row_end; p++) { | |||
| const i_t j = Arow.j[p]; | |||
| if (j == leaving) { | |||
| std::swap(Arow.j[p], Arow.j[nb_end + 1]); | |||
| std::swap(Arow.x[p], Arow.x[nb_end + 1]); | |||
| nonbasic_end[i]++; | |||
| break; | |||
| } | |||
| } | |||
| work_estimate += row_end - nb_end; | |||
| } | |||
| work_estimate += 2*row_mark.size(); | |||
| } | |||
|
|
|||
| namespace phase2 { | |||
|
|
|||
| // Computes vectors farkas_y, farkas_zl, farkas_zu that satisfy | |||
| @@ -1480,15 +1571,27 @@ i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t<i_t, f_t | |||
| const basis_update_mpf_t<i_t, f_t>& ft, | |||
| i_t basic_leaving_index, | |||
| i_t entering_index, | |||
| i_t leaving_index, | |||
| std::vector<f_t>& steepest_edge_norms) | |||
| { | |||
| #if 0 | |||
There was a problem hiding this comment.
Why not delete this code block?
There was a problem hiding this comment.
Yep. This code will all be deleted as will the function compute_steepest_edge_norm_entering before merging.
|
Note Reviews pausedIt looks like this branch is under active development. To avoid overwhelming you with review comments due to an influx of new commits, CodeRabbit has automatically paused this review. You can configure this behavior by changing the Use the following commands to manage reviews:
Use the checkboxes below for quick actions:
📝 WalkthroughWalkthroughSwitched internal sparse-matrix usage from CSC-transpose to CSR row-compressed form ( ChangesCSR-driven delta-z / nonbasic-segment
LU refactor / trailing-matrix LU
PDLP presolve selection tweak
Estimated code review effort🎯 4 (Complex) | ⏱️ ~60 minutes 🚥 Pre-merge checks | ✅ 4 | ❌ 1❌ Failed checks (1 warning)
✅ Passed checks (4 passed)
✏️ Tip: You can configure your own custom pre-merge checks in the settings. ✨ Finishing Touches🧪 Generate unit tests (beta)
Tip 💬 Introducing Slack Agent: The best way for teams to turn conversations into code.Slack Agent is built on CodeRabbit's deep understanding of your code, so your team can collaborate across the entire SDLC without losing context.
Built for teams:
One agent for your entire SDLC. Right inside Slack. Comment |
There was a problem hiding this comment.
Actionable comments posted: 2
Caution
Some comments are outside the diff and can’t be posted inline due to platform limitations.
⚠️ Outside diff range comments (2)
cpp/src/dual_simplex/phase2.cpp (2)
3366-3409:⚠️ Potential issue | 🟠 MajorCount the flip-adjustment solve in
solve_work.Lines 3366-3379 call
phase2::adjust_for_flips(), and that helper does aft.b_solve(...), but only the later FTRAN on Lines 3386-3408 contributes tosolve_work. On flip-heavy models, the Line 3562 trigger will undercount solve cost and delay the refactorization it is supposed to schedule.💡 Suggested fix
delta_xB_0_sparse.clear(); if (num_flipped > 0) { timers.start_timer(); + f_t flip_adjust_start_work = ft.work_estimate(); phase2::adjust_for_flips(ft, basic_list, delta_z_indices, atilde_index, atilde, atilde_mark, atilde_sparse, delta_xB_0_sparse, delta_x_flip, x, phase2_work_estimate); + solve_work += (ft.work_estimate() - flip_adjust_start_work); timers.ftran_time += timers.stop_timer(); }🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed. In `@cpp/src/dual_simplex/phase2.cpp` around lines 3366 - 3409, The flip-adjustment FTRAN performed inside phase2::adjust_for_flips (which calls ft.b_solve) is not being added to solve_work; capture ft.work_estimate() immediately before and after calling phase2::adjust_for_flips (similar to how ftran_start_work is used later) and add their difference to solve_work so the flip-related solve cost is included when deciding refactorization.
2656-2675:⚠️ Potential issue | 🟠 MajorGuard the work-based refactor trigger for warm-started bases.
Lines 2656-2675 only populate
refactor_workin the initialization branch. When this entry point reuses an existing basis (initialize_basis == false), Line 3562 comparessolve_workagainst0.0, so the new heuristic collapses into “refactor after 100 updates” on warm-started solves.💡 Suggested fix
- f_t refactor_work = 0.0; - f_t solve_work = 0.0; + f_t refactor_work = 0.0; + f_t solve_work = 0.0; + bool have_refactor_work = false; ... - refactor_work = ft.work_estimate() - refactor_start_work; + refactor_work = ft.work_estimate() - refactor_start_work; + have_refactor_work = true; ... - bool should_refactor = - (ft.num_updates() > 100 && solve_work > refactor_work) || (ft.num_updates() > 1000); + bool should_refactor = + (ft.num_updates() > 1000) || + (have_refactor_work && ft.num_updates() > 100 && solve_work > refactor_work); ... - refactor_work = ft.work_estimate() - refactor_start_work; + refactor_work = ft.work_estimate() - refactor_start_work; + have_refactor_work = true;Also applies to: 3560-3563, 3621-3650
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed. In `@cpp/src/dual_simplex/phase2.cpp` around lines 2656 - 2675, The refactor_work baseline is only set inside the initialize_basis branch so warm-started solves leave refactor_work == 0.0 and break the work-based refactor heuristic; fix by establishing a proper baseline when initialize_basis == false (e.g. assign refactor_work = ft.work_estimate() before any update/solve work) or by guarding the later work-comparison logic with initialize_basis so the work-based trigger is only evaluated when refactor_work was actually computed (refer to refactor_work, solve_work, initialize_basis, and ft.work_estimate()/ft.refactor_basis to locate the affected code).
🤖 Prompt for all review comments with AI agents
Verify each finding against the current code and only fix it if needed.
Inline comments:
In `@cpp/src/branch_and_bound/pseudo_costs.cpp`:
- Around line 1476-1492: compute_initial_nonbasic_end mutates the shared
pseudo_costs_t::Arow in-place, causing races when multiple BEST_FIRST workers
run concurrently; make a worker-local copy of Arow and use that copy for
compute_initial_nonbasic_end and all subsequent calls in this block (e.g., pass
local_Arow to compute_initial_nonbasic_end and to
single_pivot_objective_change_estimate) so nonbasic_end and the pivot estimate
operate on a stable, per-worker CSR ordering instead of the shared Arow.
In `@cpp/src/dual_simplex/phase2.cpp`:
- Around line 133-141: The CHECK_CHANGE_IN_REDUCED_COST validation path inside
compute_delta_z is stale because it references removed symbols (A_transpose, lp,
basic_list, nonbasic_list); either remove that macro block or update it to use
the current inputs (Arow, delta_y, leaving_index, direction, nonbasic_end,
delta_z_mark, delta_z_indices, delta_z) and any available LP state, ensuring the
validation only uses in-scope variables; specifically locate the
CHECK_CHANGE_IN_REDUCED_COST block in compute_delta_z and refactor or delete it
so the checker no longer references the out-of-scope identifiers.
---
Outside diff comments:
In `@cpp/src/dual_simplex/phase2.cpp`:
- Around line 3366-3409: The flip-adjustment FTRAN performed inside
phase2::adjust_for_flips (which calls ft.b_solve) is not being added to
solve_work; capture ft.work_estimate() immediately before and after calling
phase2::adjust_for_flips (similar to how ftran_start_work is used later) and add
their difference to solve_work so the flip-related solve cost is included when
deciding refactorization.
- Around line 2656-2675: The refactor_work baseline is only set inside the
initialize_basis branch so warm-started solves leave refactor_work == 0.0 and
break the work-based refactor heuristic; fix by establishing a proper baseline
when initialize_basis == false (e.g. assign refactor_work = ft.work_estimate()
before any update/solve work) or by guarding the later work-comparison logic
with initialize_basis so the work-based trigger is only evaluated when
refactor_work was actually computed (refer to refactor_work, solve_work,
initialize_basis, and ft.work_estimate()/ft.refactor_basis to locate the
affected code).
🪄 Autofix (Beta)
Fix all unresolved CodeRabbit comments on this PR:
- Push a commit to this branch (recommended)
- Create a new PR with the fixes
ℹ️ Review info
⚙️ Run configuration
Configuration used: Path: .coderabbit.yaml
Review profile: CHILL
Plan: Pro
Run ID: b1230117-8559-4b83-8b43-f42888816349
📒 Files selected for processing (5)
cpp/src/branch_and_bound/branch_and_bound.cppcpp/src/branch_and_bound/pseudo_costs.cppcpp/src/branch_and_bound/pseudo_costs.hppcpp/src/dual_simplex/phase2.cppcpp/src/dual_simplex/phase2.hpp
There was a problem hiding this comment.
Caution
Some comments are outside the diff and can’t be posted inline due to platform limitations.
⚠️ Outside diff range comments (1)
cpp/src/dual_simplex/phase2.cpp (1)
3333-3375:⚠️ Potential issue | 🟠 MajorCount the flip-adjustment solve in
solve_work.
adjust_for_flips()does aft.b_solve(...), but only the latercompute_delta_x()solve is added tosolve_work. On bound-flip-heavy iterations this undercounts the new refactor metric, soshould_refactorcan stay false even after the basis has already spent more work in solves than in the last factorization.[suggested fix]
Patch
delta_xB_0_sparse.clear(); if (num_flipped > 0) { timers.start_timer(); + f_t flip_adjust_start_work = ft.work_estimate(); phase2::adjust_for_flips(ft, basic_list, delta_z_indices, atilde_index, atilde, atilde_mark, atilde_sparse, delta_xB_0_sparse, delta_x_flip, x, phase2_work_estimate); + solve_work += (ft.work_estimate() - flip_adjust_start_work); timers.ftran_time += timers.stop_timer(); }🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed. In `@cpp/src/dual_simplex/phase2.cpp` around lines 3333 - 3375, The flip-adjustment B-solve performed inside phase2::adjust_for_flips(ft, ...) isn't currently included in solve_work, causing undercounting; before calling phase2::adjust_for_flips capture ft.work_estimate(), call the function, then add the difference (ft.work_estimate() - prior_work) to solve_work (similar to how ftran_start_work is used around phase2::compute_delta_x) so the work from ft.b_solve during adjust_for_flips is accounted for when deciding should_refactor.
🤖 Prompt for all review comments with AI agents
Verify each finding against the current code and only fix it if needed.
Outside diff comments:
In `@cpp/src/dual_simplex/phase2.cpp`:
- Around line 3333-3375: The flip-adjustment B-solve performed inside
phase2::adjust_for_flips(ft, ...) isn't currently included in solve_work,
causing undercounting; before calling phase2::adjust_for_flips capture
ft.work_estimate(), call the function, then add the difference
(ft.work_estimate() - prior_work) to solve_work (similar to how ftran_start_work
is used around phase2::compute_delta_x) so the work from ft.b_solve during
adjust_for_flips is accounted for when deciding should_refactor.
ℹ️ Review info
⚙️ Run configuration
Configuration used: Path: .coderabbit.yaml
Review profile: CHILL
Plan: Pro Plus
Run ID: 700fb170-9e65-4296-9a09-62cd3304fb11
📒 Files selected for processing (2)
cpp/src/branch_and_bound/pseudo_costs.cppcpp/src/dual_simplex/phase2.cpp
|
🔔 Hi @anandhkb @chris-maes, this pull request has had no activity for 7 days. Please update or let us know if it can be closed. Thank you! If this is an "epic" issue, then please add the "epic" label to this issue. |
There was a problem hiding this comment.
Actionable comments posted: 1
Caution
Some comments are outside the diff and can’t be posted inline due to platform limitations.
⚠️ Outside diff range comments (1)
cpp/src/dual_simplex/phase2.cpp (1)
2622-2641:⚠️ Potential issue | 🟠 Major | ⚡ Quick winSeed
refactor_workbefore using the work-based refactor trigger.
refactor_workonly gets a value wheninitialize_basisis true. On theinitialize_basis == falsepath, it stays0, so onceft.num_updates() > 100thesolve_work > refactor_workbranch always fires and forces a refactor with no real cost baseline.Also applies to: 3552-3555
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@cpp/src/dual_simplex/phase2.cpp` around lines 2622 - 2641, refactor_work is left at 0.0 when initialize_basis is false, causing the work-based refactor trigger to always prefer refactoring; set a proper baseline by initializing refactor_work from the current refactorer's work estimate (e.g. refactor_work = ft.work_estimate()) before the initialize_basis conditional (or at least before any decision that compares solve_work to refactor_work), and when you actually call ft.refactor_basis update refactor_work to the delta as currently done; apply the same change to the other occurrence where refactor_work is used (the block around the second refactor decision that currently checks ft.num_updates() > 100).
🧹 Nitpick comments (1)
cpp/src/pdlp/solve.cu (1)
1428-1436: ⚡ Quick winDocument and centralize the presolve threshold contract.
Line 1428 introduces a hardcoded algorithm-selection threshold (
8000) that changespresolver_t::Defaultbehavior at runtime. Please move this to a named solver constant/setting and document the behavior (including theDefaultmapping) in solver settings docs to keep behavior reproducible and aligned with API semantics.As per coding guidelines
**/*.{cpp,cuh,cu,hpp,py}: Flag missing documentation for numerical tolerances and algorithm parameters.🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@cpp/src/pdlp/solve.cu` around lines 1428 - 1436, Replace the hardcoded presolve_nnz_threshold (8000) with a named solver constant or a settings field (e.g., SolverSettings::presolve_nnz_threshold or kPresolveNnzThreshold) and use that constant where currently referenced; ensure the runtime mapping from presolver_t::Default to presolver_t::PSLP when op_problem.get_nnz() >= that threshold is documented in the solver settings docs and the API comments (mentioning presolver_t::Default, presolver_t::PSLP, settings.presolver and op_problem.get_nnz()), and add a note in the numerical/algorithm parameters section describing the threshold contract and how to override it via settings.
🤖 Prompt for all review comments with AI agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.
Inline comments:
In `@cpp/src/dual_simplex/right_looking_lu.cpp`:
- Around line 1505-1584: The loop never accounts for the heavy internal work
done inside trailing_matrix_t, so ensure the trailing-matrix work is added into
the outer work_estimate: either (preferred) add a public accessor on
trailing_matrix_t (e.g. trailing_matrix.work_estimate() or
trailing_matrix.estimate_work()) that returns its accumulated internal cost and
call work_estimate += trailing_matrix.work_estimate() at an appropriate point in
the pivot loop (for example right after trailing_matrix.garbage_collect()), or
alternatively have the heavy methods (trailing_matrix.cache_pivot_row,
extract_row, extract_column, schur_complement, update_for_pivot_removal,
garbage_collect) return their individual cost and add those returns to
work_estimate where they are invoked; make sure to reference
trailing_matrix_t/trailing_matrix and the local work_estimate variable when
implementing the change.
---
Outside diff comments:
In `@cpp/src/dual_simplex/phase2.cpp`:
- Around line 2622-2641: refactor_work is left at 0.0 when initialize_basis is
false, causing the work-based refactor trigger to always prefer refactoring; set
a proper baseline by initializing refactor_work from the current refactorer's
work estimate (e.g. refactor_work = ft.work_estimate()) before the
initialize_basis conditional (or at least before any decision that compares
solve_work to refactor_work), and when you actually call ft.refactor_basis
update refactor_work to the delta as currently done; apply the same change to
the other occurrence where refactor_work is used (the block around the second
refactor decision that currently checks ft.num_updates() > 100).
---
Nitpick comments:
In `@cpp/src/pdlp/solve.cu`:
- Around line 1428-1436: Replace the hardcoded presolve_nnz_threshold (8000)
with a named solver constant or a settings field (e.g.,
SolverSettings::presolve_nnz_threshold or kPresolveNnzThreshold) and use that
constant where currently referenced; ensure the runtime mapping from
presolver_t::Default to presolver_t::PSLP when op_problem.get_nnz() >= that
threshold is documented in the solver settings docs and the API comments
(mentioning presolver_t::Default, presolver_t::PSLP, settings.presolver and
op_problem.get_nnz()), and add a note in the numerical/algorithm parameters
section describing the threshold contract and how to override it via settings.
🪄 Autofix (Beta)
Fix all unresolved CodeRabbit comments on this PR:
- Push a commit to this branch (recommended)
- Create a new PR with the fixes
ℹ️ Review info
⚙️ Run configuration
Configuration used: Path: .coderabbit.yaml
Review profile: CHILL
Plan: Enterprise
Run ID: 83191bb9-bc00-4db2-aaaa-1b53a175ecd4
📒 Files selected for processing (6)
cpp/src/branch_and_bound/branch_and_bound.cppcpp/src/dual_simplex/basis_solves.cppcpp/src/dual_simplex/phase2.cppcpp/src/dual_simplex/right_looking_lu.cppcpp/src/dual_simplex/right_looking_lu.hppcpp/src/pdlp/solve.cu
🚧 Files skipped from review as they are similar to previous changes (1)
- cpp/src/branch_and_bound/branch_and_bound.cpp
…ce (rather than per fill)
There was a problem hiding this comment.
Actionable comments posted: 1
♻️ Duplicate comments (1)
cpp/src/dual_simplex/right_looking_lu.cpp (1)
1521-1600:⚠️ Potential issue | 🟠 Major | 🏗️ Heavy liftFold trailing-matrix work into the returned
work_estimate.
right_looking_lu2only bumps the outer assembly cost here, while the new trailing-matrix path keeps its own work counter internally and never contributes it back to the returned metric. That makes this factorization look artificially cheap to the new solve-vs-refactor heuristic.🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@cpp/src/dual_simplex/right_looking_lu.cpp` around lines 1521 - 1600, The trailing_matrix path maintains its own internal work counter which is never added to the function-level work_estimate, so factorization appears artificially cheap; update right_looking_lu2 to fold that internal work into the returned work_estimate by reading the trailing_matrix's work counter and adding it into work_estimate (either after each trailing_matrix call or once per pivot iteration). Locate the trailing_matrix_t instance named trailing_matrix and its methods (markowitz_search, cache_pivot_row, extract_row, extract_column, update_for_pivot_removal, schur_complement, remove_pivot_row_and_column, garbage_collect) and add code to accumulate trailing_matrix's internal work (expose a getter like trailing_matrix.get_work() or read the public counter) into the local work_estimate so the function returns the combined cost.
🧹 Nitpick comments (1)
cpp/src/dual_simplex/right_looking_lu.cpp (1)
1508-1508: ⚡ Quick winUse a distinct NVTX range name for
right_looking_lu2.This still emits
"LU::right_looking_lu", so profiler traces collapse the old and new factorization paths into the same label and make this perf change harder to attribute.Proposed fix
- raft::common::nvtx::range scope("LU::right_looking_lu"); + raft::common::nvtx::range scope("LU::right_looking_lu2");As per coding guidelines "Flag misleading names that hide GPU/CPU boundaries, units, or problem-space context".
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@cpp/src/dual_simplex/right_looking_lu.cpp` at line 1508, The NVTX range in the right_looking_lu2 code path is reusing the string "LU::right_looking_lu", which collides with the original factorization trace; update the NVTX range invocation inside the right_looking_lu2 function (the line creating raft::common::nvtx::range scope("LU::right_looking_lu")) to use a distinct, descriptive label (e.g., "LU::right_looking_lu2" or "LU::right_looking_right_looking2") so profiler traces for right_looking_lu and right_looking_lu2 do not collapse.
🤖 Prompt for all review comments with AI agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.
Inline comments:
In `@cpp/src/dual_simplex/right_looking_lu.cpp`:
- Around line 1512-1518: Replace the runtime-only assert checks for
matrix/vector shapes with CUOPT_EXPECTS so validation remains in release builds:
change each assert(A.m == n), assert(L.n == n), assert(L.m == n), assert(U.n ==
n), assert(U.m == n), assert(q.size() == n), assert(pinv.size() == n) into
CUOPT_EXPECTS(...) calls that validate the same conditions and provide a clear
error message (e.g., "A.m must equal n", "L.n must equal n", etc.); ensure
cuopt/error.hpp is included if not already so CUOPT_EXPECTS is available and
keep the checks referencing the same symbols A, L, U, q, pinv and n to locate
the correct spot in right_looking_lu.cpp.
---
Duplicate comments:
In `@cpp/src/dual_simplex/right_looking_lu.cpp`:
- Around line 1521-1600: The trailing_matrix path maintains its own internal
work counter which is never added to the function-level work_estimate, so
factorization appears artificially cheap; update right_looking_lu2 to fold that
internal work into the returned work_estimate by reading the trailing_matrix's
work counter and adding it into work_estimate (either after each trailing_matrix
call or once per pivot iteration). Locate the trailing_matrix_t instance named
trailing_matrix and its methods (markowitz_search, cache_pivot_row, extract_row,
extract_column, update_for_pivot_removal, schur_complement,
remove_pivot_row_and_column, garbage_collect) and add code to accumulate
trailing_matrix's internal work (expose a getter like trailing_matrix.get_work()
or read the public counter) into the local work_estimate so the function returns
the combined cost.
---
Nitpick comments:
In `@cpp/src/dual_simplex/right_looking_lu.cpp`:
- Line 1508: The NVTX range in the right_looking_lu2 code path is reusing the
string "LU::right_looking_lu", which collides with the original factorization
trace; update the NVTX range invocation inside the right_looking_lu2 function
(the line creating raft::common::nvtx::range scope("LU::right_looking_lu")) to
use a distinct, descriptive label (e.g., "LU::right_looking_lu2" or
"LU::right_looking_right_looking2") so profiler traces for right_looking_lu and
right_looking_lu2 do not collapse.
🪄 Autofix (Beta)
Fix all unresolved CodeRabbit comments on this PR:
- Push a commit to this branch (recommended)
- Create a new PR with the fixes
ℹ️ Review info
⚙️ Run configuration
Configuration used: Path: .coderabbit.yaml
Review profile: CHILL
Plan: Enterprise
Run ID: daef7416-b08c-4a3d-8b34-259c09cf4d7f
📒 Files selected for processing (1)
cpp/src/dual_simplex/right_looking_lu.cpp
| assert(A.m == n); | ||
| assert(L.n == n); | ||
| assert(L.m == n); | ||
| assert(U.n == n); | ||
| assert(U.m == n); | ||
| assert(q.size() == n); | ||
| assert(pinv.size() == n); |
There was a problem hiding this comment.
Replace these assert preconditions with CUOPT_EXPECTS.
On release builds these checks disappear, so a bad A/L/U/q/pinv shape can fall through into unchecked writes to the output buffers.
As per coding guidelines "Use throw and cuopt_expects(...) / CUOPT_EXPECTS(...) macros from cpp/include/cuopt/error.hpp for error handling" and "Flag missing input validation at library and server boundaries".
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.
In `@cpp/src/dual_simplex/right_looking_lu.cpp` around lines 1512 - 1518, Replace
the runtime-only assert checks for matrix/vector shapes with CUOPT_EXPECTS so
validation remains in release builds: change each assert(A.m == n), assert(L.n
== n), assert(L.m == n), assert(U.n == n), assert(U.m == n), assert(q.size() ==
n), assert(pinv.size() == n) into CUOPT_EXPECTS(...) calls that validate the
same conditions and provide a clear error message (e.g., "A.m must equal n",
"L.n must equal n", etc.); ensure cuopt/error.hpp is included if not already so
CUOPT_EXPECTS is available and keep the checks referencing the same symbols A,
L, U, q, pinv and n to locate the correct spot in right_looking_lu.cpp.
This PR includes three performance improvements to dual simplex