-
Notifications
You must be signed in to change notification settings - Fork 171
Add support for handling free variables in QPs with the augmented system #1146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -93,6 +93,7 @@ class iteration_data_t { | |
| public: | ||
| iteration_data_t(const lp_problem_t<i_t, f_t>& lp, | ||
| i_t num_upper_bounds, | ||
| const std::vector<i_t>& free_variable_indices, | ||
| const csc_matrix_t<i_t, f_t>& Qin, | ||
| const simplex_solver_settings_t<i_t, f_t>& settings) | ||
| : upper_bounds(num_upper_bounds), | ||
|
|
@@ -165,6 +166,8 @@ class iteration_data_t { | |
| d_augmented_diagonal_indices_(0, lp.handle_ptr->get_stream()), | ||
| use_augmented(false), | ||
| has_factorization(false), | ||
| n_free_vars(0), | ||
| d_is_free_(0, lp.handle_ptr->get_stream()), | ||
| num_factorizations(0), | ||
| has_solve_info(false), | ||
| settings_(settings), | ||
|
|
@@ -220,6 +223,18 @@ class iteration_data_t { | |
| { | ||
| raft::common::nvtx::range fun_scope("Barrier: LP Data Creation"); | ||
|
|
||
| // Set up free variable tracking for QPs | ||
| if (!free_variable_indices.empty()) { | ||
| n_free_vars = free_variable_indices.size(); | ||
| std::vector<i_t> is_free_host(lp.num_cols, 0); | ||
| for (i_t j : free_variable_indices) { | ||
| is_free_host[j] = 1; | ||
| } | ||
| d_is_free_.resize(lp.num_cols, stream_view_); | ||
| raft::copy(d_is_free_.data(), is_free_host.data(), lp.num_cols, stream_view_); | ||
| settings.log.printf("Free variables (QP) : %d\n", n_free_vars); | ||
| } | ||
|
|
||
| bool has_Q = Q.x.size() > 0; | ||
| indefinite_Q = false; | ||
| if (has_Q) { | ||
|
|
@@ -1538,6 +1553,8 @@ class iteration_data_t { | |
|
|
||
| bool use_augmented; | ||
| i_t symbolic_status; | ||
| i_t n_free_vars{0}; | ||
| rmm::device_uvector<i_t> d_is_free_; // 1 if variable is free (QP only), 0 otherwise | ||
|
|
||
| std::unique_ptr<sparse_cholesky_base_t<i_t, f_t>> chol; | ||
|
|
||
|
|
@@ -1908,6 +1925,12 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data) | |
| } | ||
| } | ||
| } | ||
| // Free variables have z = 0 (no complementarity condition) | ||
| if (data.n_free_vars > 0) { | ||
| for (i_t j : presolve_info.free_variable_indices) { | ||
| data.z[j] = 0.0; | ||
| } | ||
| } | ||
| } else if (use_augmented) { | ||
| dense_vector_t<i_t, f_t> dual_rhs(lp.num_cols + lp.num_rows); | ||
| dual_rhs.set_scalar(0.0); | ||
|
|
@@ -1970,8 +1993,23 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data) | |
| vector_norm2<i_t, f_t>(data.dual_residual)); | ||
| #endif | ||
| // Make sure (w, x, v, z) > 0 | ||
| if (data.n_free_vars > 0) { | ||
| std::vector<i_t> nonnegative_variables(data.w.size(), 1); | ||
| for (i_t j : presolve_info.free_variable_indices) { | ||
| nonnegative_variables[j] = 0; | ||
| } | ||
|
|
||
| data.x.ensure_positive(epsilon_adjust, nonnegative_variables); | ||
|
|
||
| for (i_t j : presolve_info.free_variable_indices) { | ||
| data.z[j] = 0.0; | ||
| } | ||
|
|
||
| } else { | ||
| data.x.ensure_positive(epsilon_adjust); | ||
| } | ||
| data.w.ensure_positive(epsilon_adjust); | ||
| data.x.ensure_positive(epsilon_adjust); | ||
|
|
||
| #ifdef PRINT_INFO | ||
| settings.log.printf("min v %e min z %e\n", data.v.minimum(), data.z.minimum()); | ||
| #endif | ||
|
|
@@ -2166,6 +2204,29 @@ f_t barrier_solver_t<i_t, f_t>::gpu_max_step_to_boundary(iteration_data_t<i_t, f | |
| const rmm::device_uvector<f_t>& x, | ||
| const rmm::device_uvector<f_t>& dx) | ||
| { | ||
| // For x-sized vectors with free variables, skip free vars in ratio test | ||
| const bool has_free = data.n_free_vars > 0 && static_cast<i_t>(x.size()) == lp.num_cols; | ||
|
|
||
| if (has_free) { | ||
| auto is_free_ptr = data.d_is_free_.data(); | ||
| auto ratio_test_free = [is_free_ptr] HD(const thrust::tuple<f_t, f_t, i_t> t) { | ||
| const f_t dx_val = thrust::get<0>(t); | ||
| const f_t x_val = thrust::get<1>(t); | ||
| const i_t is_free = thrust::get<2>(t); | ||
| if (is_free) return f_t(1.0); | ||
| if (dx_val < f_t(0.0)) return -x_val / dx_val; | ||
| return f_t(1.0); | ||
| }; | ||
|
|
||
| return data.transform_reduce_helper_.transform_reduce( | ||
| thrust::make_zip_iterator(dx.data(), x.data(), is_free_ptr), | ||
| thrust::minimum<f_t>(), | ||
| ratio_test_free, | ||
| f_t(1.0), | ||
| x.size(), | ||
| stream_view_); | ||
| } | ||
|
|
||
| return data.transform_reduce_helper_.transform_reduce( | ||
| thrust::make_zip_iterator(dx.data(), x.data()), | ||
| thrust::minimum<f_t>(), | ||
|
|
@@ -2248,11 +2309,37 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_ | |
| raft::common::nvtx::range fun_scope("Barrier: GPU diag, inv diag and sqrt inv diag formation"); | ||
|
|
||
| // diag = z ./ x | ||
| cub::DeviceTransform::Transform(cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data()), | ||
| data.d_diag_.data(), | ||
| data.d_diag_.size(), | ||
| cuda::std::divides<>{}, | ||
| stream_view_.value()); | ||
| // For native free variables (QP): use Q diagonal if available, otherwise a static regularizer | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this only for the ADA^T case? |
||
| if (data.n_free_vars > 0) { | ||
| constexpr f_t free_var_reg = 1e-7; | ||
| if (data.Q.n > 0 && data.Q_diagonal) { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple( | ||
| data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data(), data.d_Q_diag_.data()), | ||
| data.d_diag_.data(), | ||
| data.d_diag_.size(), | ||
| [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free, f_t q_jj) { | ||
| if (!is_free) return z_j / x_j; | ||
| return (q_jj > f_t(0)) ? f_t(0) : free_var_reg; | ||
| }, | ||
| stream_view_.value()); | ||
| } else { | ||
| cub::DeviceTransform::Transform( | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm confused about this case: it is for data.Q.n == 0 || !data.Q.diagonal |
||
| cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data()), | ||
| data.d_diag_.data(), | ||
| data.d_diag_.size(), | ||
| [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free) { | ||
| return is_free ? free_var_reg : (z_j / x_j); | ||
| }, | ||
| stream_view_.value()); | ||
| } | ||
| } else { | ||
| cub::DeviceTransform::Transform(cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data()), | ||
| data.d_diag_.data(), | ||
| data.d_diag_.size(), | ||
| cuda::std::divides<>{}, | ||
| stream_view_.value()); | ||
| } | ||
| RAFT_CHECK_CUDA(stream_view_); | ||
| // diag = z ./ x + E * (v ./ w) * E' | ||
|
|
||
|
|
@@ -2358,20 +2445,40 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_ | |
| RAFT_CHECK_CUDA(stream_view_); | ||
| // tmp3 <- tmp3 .+ -(complementarity_xz_rhs ./ x) .+ dual_rhs | ||
| // tmp4 <- inv_diag .* tmp3 | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(data.d_inv_diag.data(), | ||
| data.d_tmp3_.data(), | ||
| data.d_complementarity_xz_rhs_.data(), | ||
| data.d_x_.data(), | ||
| data.d_dual_rhs_.data()), | ||
| thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), | ||
| lp.num_cols, | ||
| [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs) | ||
| -> thrust::tuple<f_t, f_t> { | ||
| const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs; | ||
| return {tmp, inv_diag * tmp}; | ||
| }, | ||
| stream_view_.value()); | ||
| // For free variables, skip the complementarity_xz_rhs / x term | ||
| if (data.n_free_vars > 0) { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(data.d_inv_diag.data(), | ||
| data.d_tmp3_.data(), | ||
| data.d_complementarity_xz_rhs_.data(), | ||
| data.d_x_.data(), | ||
| data.d_dual_rhs_.data(), | ||
| data.d_is_free_.data()), | ||
| thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), | ||
| lp.num_cols, | ||
| [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs, i_t is_free) | ||
| -> thrust::tuple<f_t, f_t> { | ||
| const f_t xz_term = is_free ? f_t(0) : (complementarity_xz_rhs / x); | ||
| const f_t tmp = tmp3 - xz_term + dual_rhs; | ||
| return {tmp, inv_diag * tmp}; | ||
| }, | ||
| stream_view_.value()); | ||
| } else { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(data.d_inv_diag.data(), | ||
| data.d_tmp3_.data(), | ||
| data.d_complementarity_xz_rhs_.data(), | ||
| data.d_x_.data(), | ||
| data.d_dual_rhs_.data()), | ||
| thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), | ||
| lp.num_cols, | ||
| [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs) | ||
| -> thrust::tuple<f_t, f_t> { | ||
| const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs; | ||
| return {tmp, inv_diag * tmp}; | ||
| }, | ||
| stream_view_.value()); | ||
| } | ||
| RAFT_CHECK_CUDA(stream_view_); | ||
| raft::copy(data.d_r1_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_); | ||
| raft::copy(data.d_r1_prime_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_); | ||
|
|
@@ -2656,17 +2763,33 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_ | |
| raft::common::nvtx::range fun_scope("Barrier: dz formation GPU"); | ||
|
|
||
| // dz = (complementarity_xz_rhs - z.* dx) ./ x; | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), | ||
| data.d_z_.data(), | ||
| data.d_dx_.data(), | ||
| data.d_x_.data()), | ||
| data.d_dz_.data(), | ||
| data.d_dz_.size(), | ||
| [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x) { | ||
| return (complementarity_xz_rhs - z * dx) / x; | ||
| }, | ||
| stream_view_.value()); | ||
| // For free variables, dz = 0 | ||
| if (data.n_free_vars > 0) { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), | ||
| data.d_z_.data(), | ||
| data.d_dx_.data(), | ||
| data.d_x_.data(), | ||
| data.d_is_free_.data()), | ||
| data.d_dz_.data(), | ||
| data.d_dz_.size(), | ||
| [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x, i_t is_free) { | ||
| return is_free ? f_t(0) : ((complementarity_xz_rhs - z * dx) / x); | ||
| }, | ||
| stream_view_.value()); | ||
| } else { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), | ||
| data.d_z_.data(), | ||
| data.d_dx_.data(), | ||
| data.d_x_.data()), | ||
| data.d_dz_.data(), | ||
| data.d_dz_.size(), | ||
| [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x) { | ||
| return (complementarity_xz_rhs - z * dx) / x; | ||
| }, | ||
| stream_view_.value()); | ||
| } | ||
| RAFT_CHECK_CUDA(stream_view_); | ||
| raft::copy(dz.data(), data.d_dz_.data(), data.d_dz_.size(), stream_view_); | ||
| } | ||
|
|
@@ -2956,24 +3079,53 @@ void barrier_solver_t<i_t, f_t>::compute_target_mu( | |
| stream_view_); | ||
|
|
||
| complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum; | ||
|
|
||
| mu_aff = (complementarity_aff_sum) / | ||
| (static_cast<f_t>(data.x.size()) + static_cast<f_t>(data.n_upper_bounds)); | ||
| f_t mu_denom = static_cast<f_t>(data.x.size()) + static_cast<f_t>(data.n_upper_bounds); | ||
| mu_denom -= static_cast<f_t>(data.n_free_vars); | ||
| mu_aff = complementarity_aff_sum / mu_denom; | ||
|
Comment on lines
+3082
to
+3084
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Guard the zero-complementarity case before dividing by With native free variables, an equality-constrained QP can legitimately have Possible fix+ auto complementarity_pairs = [&]() -> f_t {
+ return static_cast<f_t>(data.x.size()) +
+ static_cast<f_t>(data.n_upper_bounds) -
+ static_cast<f_t>(data.n_free_vars);
+ };
+
- f_t mu_denom = static_cast<f_t>(data.x.size()) + static_cast<f_t>(data.n_upper_bounds);
- mu_denom -= static_cast<f_t>(data.n_free_vars);
- mu_aff = complementarity_aff_sum / mu_denom;
+ const f_t mu_denom = complementarity_pairs();
+ if (mu_denom == f_t(0)) {
+ mu_aff = f_t(0);
+ sigma = f_t(0);
+ new_mu = f_t(0);
+ return;
+ }
+ mu_aff = complementarity_aff_sum / mu_denom;Apply the same helper/guard in As per coding guidelines, "Check numerical stability: prevent overflow/underflow, precision loss, division by zero/near-zero, and use epsilon comparisons for floating-point equality checks." Also applies to: 3303-3312 🤖 Prompt for AI Agents |
||
| sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); | ||
| new_mu = sigma * mu_aff; | ||
| } | ||
|
|
||
| template <typename i_t, typename f_t> | ||
| static void fill_linear_cc_rhs(iteration_data_t<i_t, f_t>& data, | ||
| f_t new_mu, | ||
| raft::device_span<f_t> out, | ||
| raft::device_span<f_t> dx_aff, | ||
| raft::device_span<f_t> dz_aff, | ||
| rmm::cuda_stream_view stream_view) | ||
| { | ||
| if (out.empty()) return; | ||
| if (data.n_free_vars > 0) { | ||
| auto is_free_ptr = data.d_is_free_.data(); | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), is_free_ptr), | ||
| out.data(), | ||
| out.size(), | ||
| [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_free) { | ||
| return is_free ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu); | ||
| }, | ||
| stream_view.value()); | ||
| } else { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(dx_aff.data(), dz_aff.data()), | ||
| out.data(), | ||
| out.size(), | ||
| [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val) { return -(dx_aff_val * dz_aff_val) + new_mu; }, | ||
| stream_view.value()); | ||
| } | ||
| } | ||
|
|
||
| template <typename i_t, typename f_t> | ||
| void barrier_solver_t<i_t, f_t>::compute_cc_rhs(iteration_data_t<i_t, f_t>& data, f_t& new_mu) | ||
| { | ||
| raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs"); | ||
|
|
||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(data.d_dx_aff_.data(), data.d_dz_aff_.data()), | ||
| data.d_complementarity_xz_rhs_.data(), | ||
| data.d_complementarity_xz_rhs_.size(), | ||
| [new_mu] HD(f_t dx_aff, f_t dz_aff) { return -(dx_aff * dz_aff) + new_mu; }, | ||
| stream_view_.value()); | ||
| fill_linear_cc_rhs(data, | ||
| new_mu, | ||
| cuopt::make_span(data.d_complementarity_xz_rhs_), | ||
| cuopt::make_span(data.d_dx_aff_), | ||
| cuopt::make_span(data.d_dz_aff_), | ||
| stream_view_); | ||
| RAFT_CHECK_CUDA(stream_view_); | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(data.d_dw_aff_.data(), data.d_dv_aff_.data()), | ||
|
|
@@ -3160,13 +3312,16 @@ void barrier_solver_t<i_t, f_t>::compute_mu(iteration_data_t<i_t, f_t>& data, f_ | |
| { | ||
| raft::common::nvtx::range fun_scope("Barrier: compute_mu"); | ||
|
|
||
| f_t mu_denom = static_cast<f_t>(data.x.size()) + static_cast<f_t>(data.n_upper_bounds); | ||
| mu_denom -= static_cast<f_t>(data.n_free_vars); // free vars don't contribute to mu | ||
|
|
||
| mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(), | ||
| data.d_complementarity_xz_residual_.size(), | ||
| stream_view_) + | ||
| data.sum_reduce_helper_.sum(data.d_complementarity_wv_residual_.begin(), | ||
| data.d_complementarity_wv_residual_.size(), | ||
| stream_view_)) / | ||
| (static_cast<f_t>(data.x.size()) + static_cast<f_t>(data.n_upper_bounds)); | ||
| mu_denom; | ||
| } | ||
|
|
||
| template <typename i_t, typename f_t> | ||
|
|
@@ -3444,7 +3599,8 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time, | |
| csc_matrix_t<i_t, f_t> Q(lp.num_cols, 0, 0); | ||
| if (lp.Q.n > 0) { create_Q(lp, Q); } | ||
|
|
||
| iteration_data_t<i_t, f_t> data(lp, num_upper_bounds, Q, settings); | ||
| iteration_data_t<i_t, f_t> data( | ||
| lp, num_upper_bounds, presolve_info.free_variable_indices, Q, settings); | ||
| if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { | ||
| settings.log.printf("Barrier solver halted\n"); | ||
| return lp_status_t::CONCURRENT_LIMIT; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1139,7 +1139,17 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original, | |
|
|
||
| problem.Q.check_matrix("Before free variable expansion"); | ||
|
|
||
| if (settings.barrier_presolve && free_variables > 0) { | ||
| // For QPs, keep free variables as-is rather than | ||
| // splitting x = v - w. The barrier solver handles them natively with a | ||
| // static regularizer on the diagonal instead of z/x complementarity terms. | ||
| if (settings.barrier_presolve && free_variables > 0 && problem.Q.n > 0) { | ||
| presolve_info.free_variable_indices.clear(); | ||
| for (i_t j = 0; j < problem.num_cols; j++) { | ||
| if (problem.lower[j] == -inf && problem.upper[j] == inf) { | ||
| presolve_info.free_variable_indices.push_back(j); | ||
| } | ||
| } | ||
| } else if (settings.barrier_presolve && free_variables > 0) { | ||
|
Comment on lines
+1145
to
+1152
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clear the split-pair metadata in the native-QP branch. This path switches free-variable representation from Suggested fix if (settings.barrier_presolve && free_variables > 0 && problem.Q.n > 0) {
presolve_info.free_variable_indices.clear();
+ presolve_info.free_variable_pairs.clear();
for (i_t j = 0; j < problem.num_cols; j++) {
if (problem.lower[j] == -inf && problem.upper[j] == inf) {
presolve_info.free_variable_indices.push_back(j);
}
}🤖 Prompt for AI Agents |
||
| // We have a variable x_j: with -inf < x_j < inf | ||
| // we create new variables v and w with 0 <= v, w and x_j = v - w | ||
| // Constraints | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Build the positivity mask at
x.size(), notw.size().data.x.ensure_positive(..., mask)walksx.size()entries, but this mask is allocated withdata.w.size()(n_upper_bounds). On any QP where a free column index is outside the upper-bound list, this writes/reads past the mask and can corrupt the initial point or crash before the first iteration.Suggested fix
As per coding guidelines, "Flag invalid memory access patterns including out-of-bounds, use-after-free, and host/device confusion."
🤖 Prompt for AI Agents