Skip to content

Commit 76df05a

Browse files
committed
Merge branch 'main' of github.com:NVIDIA/cuopt into fix_timer
2 parents b58edfc + 9ad8b45 commit 76df05a

16 files changed

Lines changed: 375 additions & 201 deletions

File tree

benchmarks/linear_programming/cuopt/run_pdlp.cu

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,23 @@ static void parse_arguments(argparse::ArgumentParser& program)
7070
"Path to PDLP hyper-params file to configure PDLP solver. Has priority over PDLP solver "
7171
"modes.");
7272

73-
program.add_argument("--presolve")
74-
.help("enable/disable presolve (default: true for MIP problems, false for LP problems)")
75-
.default_value(0)
76-
.scan<'i', int>()
77-
.choices(0, 1);
73+
program.add_argument("--presolver")
74+
.help("Presolver to use. Possible values: None, Papilo, PSLP, Default")
75+
.default_value("Default")
76+
.choices("None", "Papilo", "PSLP", "Default");
7877

7978
program.add_argument("--solution-path").help("Path where solution file will be generated");
8079
}
8180

81+
static cuopt::linear_programming::presolver_t string_to_presolver(const std::string& presolver)
82+
{
83+
if (presolver == "None") return cuopt::linear_programming::presolver_t::None;
84+
if (presolver == "Papilo") return cuopt::linear_programming::presolver_t::Papilo;
85+
if (presolver == "PSLP") return cuopt::linear_programming::presolver_t::PSLP;
86+
if (presolver == "Default") return cuopt::linear_programming::presolver_t::Default;
87+
return cuopt::linear_programming::presolver_t::Default;
88+
}
89+
8290
static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode(
8391
const std::string& mode)
8492
{
@@ -107,7 +115,7 @@ static cuopt::linear_programming::pdlp_solver_settings_t<int, double> create_sol
107115
string_to_pdlp_solver_mode(program.get<std::string>("--pdlp-solver-mode"));
108116
settings.method = static_cast<cuopt::linear_programming::method_t>(program.get<int>("--method"));
109117
settings.crossover = program.get<int>("--crossover");
110-
settings.presolve = program.get<int>("--presolve");
118+
settings.presolver = string_to_presolver(program.get<std::string>("--presolver"));
111119

112120
return settings;
113121
}

cpp/src/branch_and_bound/pseudo_costs.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,9 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
344344
if (batch_remaining_time <= 0.0) { return; }
345345
pdlp_solver_settings_t<i_t, f_t> pdlp_settings;
346346
pdlp_settings.time_limit = batch_remaining_time;
347-
const auto solutions = batch_pdlp_solve(
348-
original_problem.handle_ptr, mps_model, fractional, fraction_values, pdlp_settings);
347+
const raft::handle_t batch_pdlp_handle;
348+
const auto solutions =
349+
batch_pdlp_solve(&batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings);
349350
f_t batch_pdlp_strong_branching_time = toc(start_batch);
350351

351352
// Find max iteration on how many are done accross the batch

cpp/src/pdlp/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ set(LP_CORE_FILES
2424
${CMAKE_CURRENT_SOURCE_DIR}/termination_strategy/infeasibility_information.cu
2525
${CMAKE_CURRENT_SOURCE_DIR}/termination_strategy/convergence_information.cu
2626
${CMAKE_CURRENT_SOURCE_DIR}/optimal_batch_size_handler/optimal_batch_size_handler.cu
27+
${CMAKE_CURRENT_SOURCE_DIR}/utilities/ping_pong_graph.cu
2728
)
2829

2930
# C and Python adapter files

cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ void pdlp_initial_scaling_strategy_t<i_t, f_t>::scale_problem()
545545
#ifdef CUPDLP_DEBUG_MODE
546546
print("constraint_lower_bound", op_problem_scaled_.constraint_lower_bounds);
547547
print("constraint_upper_bound", op_problem_scaled_.constraint_upper_bounds);
548-
std::vector<f_t2> variable_bounds = host_copy(op_problem_scaled_.variable_bounds);
548+
std::vector<f_t2> variable_bounds = host_copy(op_problem_scaled_.variable_bounds, stream_view_);
549549
std::vector<f_t> lower_bounds;
550550
std::vector<f_t> upper_bounds;
551551
for (const auto& variable_bound : variable_bounds) {

cpp/src/pdlp/pdhg.cu

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,12 @@ struct primal_reflected_major_projection_bulk_op {
567567
const f_t obj_coef = objective_coefficients[var_idx];
568568
const f_t aty_val = current_AtY[idx];
569569

570+
cuopt_assert(!isnan(step_size), "primal_step_size is NaN in primal_reflected_major_projection");
571+
cuopt_assert(!isinf(step_size), "primal_step_size is Inf in primal_reflected_major_projection");
572+
cuopt_assert(step_size > f_t(0.0), "primal_step_size must be > 0");
573+
cuopt_assert(!isnan(primal_val), "primal_solution is NaN in primal_reflected_major_projection");
574+
cuopt_assert(!isnan(aty_val), "current_AtY is NaN in primal_reflected_major_projection");
575+
570576
const f_t next = primal_val - step_size * (obj_coef - aty_val);
571577

572578
const f_t2 bounds = variable_bounds[var_idx];
@@ -576,6 +582,9 @@ struct primal_reflected_major_projection_bulk_op {
576582
potential_next_primal[idx] = next_clamped;
577583
dual_slack[idx] = (next_clamped - next) / step_size;
578584
reflected_primal[idx] = f_t(2.0) * next_clamped - primal_val;
585+
586+
cuopt_assert(!isnan(reflected_primal[idx]),
587+
"reflected_primal is NaN after primal_reflected_major_projection");
579588
}
580589
};
581590

@@ -599,6 +608,12 @@ struct dual_reflected_major_projection_bulk_op {
599608
const f_t current_dual = dual_solution[idx];
600609
const f_t Ax = dual_gradient[idx];
601610

611+
cuopt_assert(!isnan(step_size), "dual_step_size is NaN in dual_reflected_major_projection");
612+
cuopt_assert(!isinf(step_size), "dual_step_size is Inf in dual_reflected_major_projection");
613+
cuopt_assert(step_size > f_t(0.0), "dual_step_size must be > 0");
614+
cuopt_assert(!isnan(current_dual), "dual_solution is NaN in dual_reflected_major_projection");
615+
cuopt_assert(!isnan(Ax), "dual_gradient is NaN in dual_reflected_major_projection");
616+
602617
const f_t tmp = current_dual / step_size - Ax;
603618
const f_t tmp_proj =
604619
cuda::std::max<f_t>(-constraint_upper_bounds[constraint_idx],
@@ -607,6 +622,9 @@ struct dual_reflected_major_projection_bulk_op {
607622

608623
potential_next_dual[idx] = next_dual;
609624
reflected_dual[idx] = f_t(2.0) * next_dual - current_dual;
625+
626+
cuopt_assert(!isnan(reflected_dual[idx]),
627+
"reflected_dual is NaN after dual_reflected_major_projection");
610628
}
611629
};
612630

@@ -631,12 +649,21 @@ struct primal_reflected_projection_bulk_op {
631649
const f_t obj_coef = objective_coefficients[var_idx];
632650
const f_t aty_val = current_AtY[idx];
633651

652+
cuopt_assert(!isnan(step_size), "primal_step_size is NaN in primal_reflected_projection");
653+
cuopt_assert(!isnan(primal_val), "primal_solution is NaN in primal_reflected_projection");
654+
cuopt_assert(!isnan(aty_val), "current_AtY is NaN in primal_reflected_projection");
655+
cuopt_assert(!isinf(step_size), "primal_step_size is Inf in primal_reflected_projection");
656+
cuopt_assert(step_size > f_t(0.0), "primal_step_size must be > 0");
657+
634658
f_t reflected = primal_val - step_size * (obj_coef - aty_val);
635659

636660
const f_t2 bounds = variable_bounds[var_idx];
637661
reflected = cuda::std::max(cuda::std::min(reflected, get_upper(bounds)), get_lower(bounds));
638662

639663
reflected_primal[idx] = f_t(2.0) * reflected - primal_val;
664+
665+
cuopt_assert(!isnan(reflected_primal[idx]),
666+
"reflected_primal is NaN after primal_reflected_projection");
640667
}
641668
};
642669

@@ -659,13 +686,23 @@ struct dual_reflected_projection_bulk_op {
659686

660687
const f_t step_size = dual_step_size[batch_idx];
661688
const f_t current_dual = dual_solution[idx];
662-
const f_t tmp = current_dual / step_size - dual_gradient[idx];
689+
690+
cuopt_assert(!isnan(step_size), "dual_step_size is NaN in dual_reflected_projection");
691+
cuopt_assert(!isnan(current_dual), "dual_solution is NaN in dual_reflected_projection");
692+
cuopt_assert(!isnan(dual_gradient[idx]), "dual_gradient is NaN in dual_reflected_projection");
693+
cuopt_assert(!isinf(step_size), "dual_step_size is Inf in dual_reflected_projection");
694+
cuopt_assert(step_size > f_t(0.0), "dual_step_size must be > 0");
695+
696+
const f_t tmp = current_dual / step_size - dual_gradient[idx];
663697
const f_t tmp_proj =
664698
cuda::std::max<f_t>(-constraint_upper_bounds[constraint_idx],
665699
cuda::std::min<f_t>(tmp, -constraint_lower_bounds[constraint_idx]));
666700
const f_t next_dual = (tmp - tmp_proj) * step_size;
667701

668702
reflected_dual[idx] = f_t(2.0) * next_dual - current_dual;
703+
704+
cuopt_assert(!isnan(reflected_dual[idx]),
705+
"reflected_dual is NaN after dual_reflected_projection");
669706
}
670707
};
671708

cpp/src/pdlp/pdlp.cu

Lines changed: 81 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535

3636
#include <thrust/count.h>
3737
#include <thrust/extrema.h>
38+
#include <thrust/logical.h>
3839

40+
#include <cmath>
3941
#include <optional>
4042
#include <unordered_set>
4143

@@ -1186,24 +1188,55 @@ static void compute_stats(const rmm::device_uvector<f_t>& vec,
11861188
f_t& avg)
11871189
{
11881190
auto abs_op = [] __host__ __device__(f_t x) { return abs(x); };
1189-
auto min_nonzero = [] __host__ __device__(f_t x) {
1190-
return x == 0 ? std::numeric_limits<f_t>::max() : abs(x);
1191-
};
1192-
1193-
smallest = thrust::transform_reduce(rmm::exec_policy(vec.stream()),
1194-
vec.begin(),
1195-
vec.end(),
1196-
min_nonzero,
1197-
std::numeric_limits<f_t>::max(),
1198-
thrust::minimum<f_t>());
1199-
1200-
largest = thrust::transform_reduce(
1201-
rmm::exec_policy(vec.stream()), vec.begin(), vec.end(), abs_op, 0.0f, thrust::maximum<f_t>());
1202-
1203-
f_t sum = thrust::transform_reduce(
1204-
rmm::exec_policy(vec.stream()), vec.begin(), vec.end(), abs_op, 0.0f, thrust::plus<f_t>());
1205-
1206-
avg = sum / vec.size();
1191+
auto min_nonzero = [] __host__ __device__(f_t x)
1192+
-> f_t { return x == 0 ? std::numeric_limits<f_t>::max() : abs(x); };
1193+
1194+
cuopt_assert(vec.size() > 0, "Vector must not be empty");
1195+
1196+
auto stream = vec.stream();
1197+
size_t n = vec.size();
1198+
1199+
rmm::device_scalar<f_t> d_smallest(stream);
1200+
rmm::device_scalar<f_t> d_largest(stream);
1201+
rmm::device_scalar<f_t> d_sum(stream);
1202+
1203+
auto min_nz_iter = thrust::make_transform_iterator(vec.cbegin(), min_nonzero);
1204+
auto abs_iter = thrust::make_transform_iterator(vec.cbegin(), abs_op);
1205+
1206+
void* d_temp = nullptr;
1207+
size_t bytes_1 = 0, bytes_2 = 0, bytes_3 = 0;
1208+
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(d_temp,
1209+
bytes_1,
1210+
min_nz_iter,
1211+
d_smallest.data(),
1212+
n,
1213+
cuda::minimum<>{},
1214+
std::numeric_limits<f_t>::max(),
1215+
stream));
1216+
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(
1217+
d_temp, bytes_2, abs_iter, d_largest.data(), n, cuda::maximum<>{}, f_t(0), stream));
1218+
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(
1219+
d_temp, bytes_3, abs_iter, d_sum.data(), n, cuda::std::plus<>{}, f_t(0), stream));
1220+
1221+
size_t max_bytes = std::max({bytes_1, bytes_2, bytes_3});
1222+
rmm::device_buffer temp_buf(max_bytes, stream);
1223+
1224+
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(temp_buf.data(),
1225+
bytes_1,
1226+
min_nz_iter,
1227+
d_smallest.data(),
1228+
n,
1229+
cuda::minimum<>{},
1230+
std::numeric_limits<f_t>::max(),
1231+
stream));
1232+
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(
1233+
temp_buf.data(), bytes_2, abs_iter, d_largest.data(), n, cuda::maximum<>{}, f_t(0), stream));
1234+
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(
1235+
temp_buf.data(), bytes_3, abs_iter, d_sum.data(), n, cuda::std::plus<>{}, f_t(0), stream));
1236+
1237+
smallest = d_smallest.value(stream);
1238+
largest = d_largest.value(stream);
1239+
avg = d_sum.value(stream) / vec.size();
12071240
};
12081241

12091242
template <typename f_t>
@@ -1406,11 +1439,25 @@ HDI void fixed_error_computation(const f_t norm_squared_delta_primal,
14061439
const f_t interaction,
14071440
f_t* fixed_point_error)
14081441
{
1442+
cuopt_assert(!isnan(norm_squared_delta_primal), "norm_squared_delta_primal must not be NaN");
1443+
cuopt_assert(!isnan(norm_squared_delta_dual), "norm_squared_delta_dual must not be NaN");
1444+
cuopt_assert(!isnan(primal_weight), "primal_weight must not be NaN");
1445+
cuopt_assert(!isnan(step_size), "step_size must not be NaN");
1446+
cuopt_assert(!isnan(interaction), "interaction must not be NaN");
1447+
cuopt_assert(norm_squared_delta_primal >= f_t(0.0), "norm_squared_delta_primal must be >= 0");
1448+
cuopt_assert(norm_squared_delta_dual >= f_t(0.0), "norm_squared_delta_dual must be >= 0");
1449+
cuopt_assert(primal_weight > f_t(0.0), "primal_weight must be > 0");
1450+
cuopt_assert(step_size > f_t(0.0), "step_size must be > 0");
1451+
14091452
const f_t movement =
14101453
norm_squared_delta_primal * primal_weight + norm_squared_delta_dual / primal_weight;
14111454
const f_t computed_interaction = f_t(2.0) * interaction * step_size;
14121455

1413-
*fixed_point_error = cuda::std::sqrt(movement + computed_interaction);
1456+
cuopt_assert(movement + computed_interaction >= f_t(0.0),
1457+
"Movement + computed interaction must be >= 0");
1458+
1459+
// Clamp to 0 to avoid NaN
1460+
*fixed_point_error = cuda::std::sqrt(cuda::std::max(f_t(0.0), movement + computed_interaction));
14141461

14151462
#ifdef CUPDLP_DEBUG_MODE
14161463
printf("movement %lf\n", movement);
@@ -1790,6 +1837,7 @@ void pdlp_solver_t<i_t, f_t>::compute_fixed_error(std::vector<int>& has_restarte
17901837
// Sync to make sure all previous cuSparse operations are finished before setting the
17911838
// potential_next_dual_solution
17921839
RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
1840+
17931841
// Make potential_next_dual_solution point towards reflected dual solution to reuse the code
17941842
RAFT_CUSPARSE_TRY(cusparseDnVecSetValues(cusparse_view.potential_next_dual_solution,
17951843
(void*)pdhg_solver_.get_reflected_dual().data()));
@@ -1813,6 +1861,7 @@ void pdlp_solver_t<i_t, f_t>::compute_fixed_error(std::vector<int>& has_restarte
18131861
RAFT_CUDA_TRY(cudaStreamSynchronize(
18141862
stream_view_)); // To make sure all the data is written from device to host
18151863
RAFT_CUDA_TRY(cudaPeekAtLastError());
1864+
18161865
#ifdef CUPDLP_DEBUG_MODE
18171866
RAFT_CUDA_TRY(cudaDeviceSynchronize());
18181867
#endif
@@ -1847,9 +1896,15 @@ void pdlp_solver_t<i_t, f_t>::compute_fixed_error(std::vector<int>& has_restarte
18471896
#endif
18481897

18491898
for (size_t i = 0; i < climber_strategies_.size(); ++i) {
1899+
cuopt_assert(!std::isnan(restart_strategy_.fixed_point_error_[i]),
1900+
"fixed_point_error_ must not be NaN after compute_fixed_error");
1901+
cuopt_assert(restart_strategy_.fixed_point_error_[i] >= f_t(0.0),
1902+
"fixed_point_error_ must be >= 0 after compute_fixed_error");
18501903
if (has_restarted[i]) {
18511904
restart_strategy_.initial_fixed_point_error_[i] = restart_strategy_.fixed_point_error_[i];
1852-
has_restarted[i] = false;
1905+
cuopt_assert(!std::isnan(restart_strategy_.initial_fixed_point_error_[i]),
1906+
"initial_fixed_point_error_ must not be NaN after assignment");
1907+
has_restarted[i] = false;
18531908
}
18541909
}
18551910
}
@@ -1869,6 +1924,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_to_row(
18691924
rmm::device_uvector<f_t> dual_slack_transposed(
18701925
is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_);
18711926

1927+
RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_));
18721928
CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
18731929
CUBLAS_OP_T,
18741930
CUBLAS_OP_N,
@@ -1945,6 +2001,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_back_to_col(
19452001
rmm::device_uvector<f_t> dual_slack_transposed(
19462002
is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_);
19472003

2004+
RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_));
19482005
CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
19492006
CUBLAS_OP_T,
19502007
CUBLAS_OP_N,
@@ -2632,7 +2689,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
26322689
rmm::device_uvector<f_t> d_atq(n, stream_view_);
26332690

26342691
std::mt19937 gen(1);
2635-
std::normal_distribution<double> dist(0.0, 1.0);
2692+
std::normal_distribution<f_t> dist(f_t(0.0), f_t(1.0));
26362693

26372694
for (int i = 0; i < m; ++i)
26382695
z[i] = dist(gen);
@@ -2684,7 +2741,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
26842741
vecATQ,
26852742
CUSPARSE_SPMV_CSR_ALG2,
26862743
(f_t*)cusparse_view_.buffer_transpose.data(),
2687-
stream_view_));
2744+
stream_view_.value()));
26882745

26892746
// z = A @ A_t_q
26902747
RAFT_CUSPARSE_TRY(
@@ -2697,7 +2754,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
26972754
vecZ,
26982755
CUSPARSE_SPMV_CSR_ALG2,
26992756
(f_t*)cusparse_view_.buffer_non_transpose.data(),
2700-
stream_view_));
2757+
stream_view_.value()));
27012758
// sigma_max_sq = dot(q, z)
27022759
RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(handle_ptr_->get_cublas_handle(),
27032760
m,
@@ -2706,7 +2763,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
27062763
d_z.data(),
27072764
primal_stride,
27082765
sigma_max_sq.data(),
2709-
stream_view_));
2766+
stream_view_.value()));
27102767

27112768
cub::DeviceTransform::Transform(
27122769
cuda::std::make_tuple(d_q.data(), d_z.data()),

cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@
3434
#include <thrust/for_each.h>
3535
#include <thrust/functional.h>
3636
#include <thrust/iterator/counting_iterator.h>
37+
#include <thrust/iterator/reverse_iterator.h>
3738
#include <thrust/iterator/transform_iterator.h>
3839
#include <thrust/iterator/zip_iterator.h>
3940
#include <thrust/logical.h>
4041
#include <thrust/sort.h>
41-
#include <thrust/transform_reduce.h>
4242

4343
#include <cub/cub.cuh>
4444

0 commit comments

Comments
 (0)