diff --git a/cpp/src/barrier/barrier.cu b/cpp/src/barrier/barrier.cu index 778038db1f..ba2fbc1e10 100644 --- a/cpp/src/barrier/barrier.cu +++ b/cpp/src/barrier/barrier.cu @@ -93,6 +93,7 @@ class iteration_data_t { public: iteration_data_t(const lp_problem_t& lp, i_t num_upper_bounds, + const std::vector& free_variable_indices, const csc_matrix_t& Qin, const simplex_solver_settings_t& settings) : upper_bounds(num_upper_bounds), @@ -165,6 +166,8 @@ class iteration_data_t { d_augmented_diagonal_indices_(0, lp.handle_ptr->get_stream()), use_augmented(false), has_factorization(false), + n_free_vars(0), + d_is_free_(0, lp.handle_ptr->get_stream()), num_factorizations(0), has_solve_info(false), settings_(settings), @@ -220,6 +223,18 @@ class iteration_data_t { { raft::common::nvtx::range fun_scope("Barrier: LP Data Creation"); + // Set up free variable tracking for QPs + if (!free_variable_indices.empty()) { + n_free_vars = free_variable_indices.size(); + std::vector is_free_host(lp.num_cols, 0); + for (i_t j : free_variable_indices) { + is_free_host[j] = 1; + } + d_is_free_.resize(lp.num_cols, stream_view_); + raft::copy(d_is_free_.data(), is_free_host.data(), lp.num_cols, stream_view_); + settings.log.printf("Free variables (QP) : %d\n", n_free_vars); + } + bool has_Q = Q.x.size() > 0; indefinite_Q = false; if (has_Q) { @@ -1538,6 +1553,8 @@ class iteration_data_t { bool use_augmented; i_t symbolic_status; + i_t n_free_vars{0}; + rmm::device_uvector d_is_free_; // 1 if variable is free (QP only), 0 otherwise std::unique_ptr> chol; @@ -1908,6 +1925,12 @@ int barrier_solver_t::initial_point(iteration_data_t& data) } } } + // Free variables have z = 0 (no complementarity condition) + if (data.n_free_vars > 0) { + for (i_t j : presolve_info.free_variable_indices) { + data.z[j] = 0.0; + } + } } else if (use_augmented) { dense_vector_t dual_rhs(lp.num_cols + lp.num_rows); dual_rhs.set_scalar(0.0); @@ -1970,8 +1993,23 @@ int barrier_solver_t::initial_point(iteration_data_t& data) vector_norm2(data.dual_residual)); #endif // Make sure (w, x, v, z) > 0 + if (data.n_free_vars > 0) { + std::vector nonnegative_variables(data.w.size(), 1); + for (i_t j : presolve_info.free_variable_indices) { + nonnegative_variables[j] = 0; + } + + data.x.ensure_positive(epsilon_adjust, nonnegative_variables); + + for (i_t j : presolve_info.free_variable_indices) { + data.z[j] = 0.0; + } + + } else { + data.x.ensure_positive(epsilon_adjust); + } data.w.ensure_positive(epsilon_adjust); - data.x.ensure_positive(epsilon_adjust); + #ifdef PRINT_INFO settings.log.printf("min v %e min z %e\n", data.v.minimum(), data.z.minimum()); #endif @@ -2166,6 +2204,29 @@ f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t& x, const rmm::device_uvector& dx) { + // For x-sized vectors with free variables, skip free vars in ratio test + const bool has_free = data.n_free_vars > 0 && static_cast(x.size()) == lp.num_cols; + + if (has_free) { + auto is_free_ptr = data.d_is_free_.data(); + auto ratio_test_free = [is_free_ptr] HD(const thrust::tuple t) { + const f_t dx_val = thrust::get<0>(t); + const f_t x_val = thrust::get<1>(t); + const i_t is_free = thrust::get<2>(t); + if (is_free) return f_t(1.0); + if (dx_val < f_t(0.0)) return -x_val / dx_val; + return f_t(1.0); + }; + + return data.transform_reduce_helper_.transform_reduce( + thrust::make_zip_iterator(dx.data(), x.data(), is_free_ptr), + thrust::minimum(), + ratio_test_free, + f_t(1.0), + x.size(), + stream_view_); + } + return data.transform_reduce_helper_.transform_reduce( thrust::make_zip_iterator(dx.data(), x.data()), thrust::minimum(), @@ -2248,11 +2309,37 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t{}, - stream_view_.value()); + // For native free variables (QP): use Q diagonal if available, otherwise a static regularizer + if (data.n_free_vars > 0) { + constexpr f_t free_var_reg = 1e-7; + if (data.Q.n > 0 && data.Q_diagonal) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple( + data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data(), data.d_Q_diag_.data()), + data.d_diag_.data(), + data.d_diag_.size(), + [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free, f_t q_jj) { + if (!is_free) return z_j / x_j; + return (q_jj > f_t(0)) ? f_t(0) : free_var_reg; + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data(), data.d_is_free_.data()), + data.d_diag_.data(), + data.d_diag_.size(), + [free_var_reg] HD(f_t z_j, f_t x_j, i_t is_free) { + return is_free ? free_var_reg : (z_j / x_j); + }, + stream_view_.value()); + } + } else { + cub::DeviceTransform::Transform(cuda::std::make_tuple(data.d_z_.data(), data.d_x_.data()), + data.d_diag_.data(), + data.d_diag_.size(), + cuda::std::divides<>{}, + stream_view_.value()); + } RAFT_CHECK_CUDA(stream_view_); // diag = z ./ x + E * (v ./ w) * E' @@ -2358,20 +2445,40 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t thrust::tuple { - const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs; - return {tmp, inv_diag * tmp}; - }, - stream_view_.value()); + // For free variables, skip the complementarity_xz_rhs / x term + if (data.n_free_vars > 0) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_inv_diag.data(), + data.d_tmp3_.data(), + data.d_complementarity_xz_rhs_.data(), + data.d_x_.data(), + data.d_dual_rhs_.data(), + data.d_is_free_.data()), + thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), + lp.num_cols, + [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs, i_t is_free) + -> thrust::tuple { + const f_t xz_term = is_free ? f_t(0) : (complementarity_xz_rhs / x); + const f_t tmp = tmp3 - xz_term + dual_rhs; + return {tmp, inv_diag * tmp}; + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_inv_diag.data(), + data.d_tmp3_.data(), + data.d_complementarity_xz_rhs_.data(), + data.d_x_.data(), + data.d_dual_rhs_.data()), + thrust::make_zip_iterator(data.d_tmp3_.data(), data.d_tmp4_.data()), + lp.num_cols, + [] HD(f_t inv_diag, f_t tmp3, f_t complementarity_xz_rhs, f_t x, f_t dual_rhs) + -> thrust::tuple { + const f_t tmp = tmp3 + -(complementarity_xz_rhs / x) + dual_rhs; + return {tmp, inv_diag * tmp}; + }, + stream_view_.value()); + } RAFT_CHECK_CUDA(stream_view_); raft::copy(data.d_r1_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_); raft::copy(data.d_r1_prime_.data(), data.d_tmp3_.data(), data.d_tmp3_.size(), stream_view_); @@ -2656,17 +2763,33 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t 0) { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), + data.d_z_.data(), + data.d_dx_.data(), + data.d_x_.data(), + data.d_is_free_.data()), + data.d_dz_.data(), + data.d_dz_.size(), + [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x, i_t is_free) { + return is_free ? f_t(0) : ((complementarity_xz_rhs - z * dx) / x); + }, + stream_view_.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_complementarity_xz_rhs_.data(), + data.d_z_.data(), + data.d_dx_.data(), + data.d_x_.data()), + data.d_dz_.data(), + data.d_dz_.size(), + [] HD(f_t complementarity_xz_rhs, f_t z, f_t dx, f_t x) { + return (complementarity_xz_rhs - z * dx) / x; + }, + stream_view_.value()); + } RAFT_CHECK_CUDA(stream_view_); raft::copy(dz.data(), data.d_dz_.data(), data.d_dz_.size(), stream_view_); } @@ -2956,24 +3079,53 @@ void barrier_solver_t::compute_target_mu( stream_view_); complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum; - - mu_aff = (complementarity_aff_sum) / - (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); + f_t mu_denom = static_cast(data.x.size()) + static_cast(data.n_upper_bounds); + mu_denom -= static_cast(data.n_free_vars); + mu_aff = complementarity_aff_sum / mu_denom; sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); new_mu = sigma * mu_aff; } +template +static void fill_linear_cc_rhs(iteration_data_t& data, + f_t new_mu, + raft::device_span out, + raft::device_span dx_aff, + raft::device_span dz_aff, + rmm::cuda_stream_view stream_view) +{ + if (out.empty()) return; + if (data.n_free_vars > 0) { + auto is_free_ptr = data.d_is_free_.data(); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(dx_aff.data(), dz_aff.data(), is_free_ptr), + out.data(), + out.size(), + [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val, i_t is_free) { + return is_free ? f_t(0) : (-(dx_aff_val * dz_aff_val) + new_mu); + }, + stream_view.value()); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(dx_aff.data(), dz_aff.data()), + out.data(), + out.size(), + [new_mu] HD(f_t dx_aff_val, f_t dz_aff_val) { return -(dx_aff_val * dz_aff_val) + new_mu; }, + stream_view.value()); + } +} + template void barrier_solver_t::compute_cc_rhs(iteration_data_t& data, f_t& new_mu) { raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs"); - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_dx_aff_.data(), data.d_dz_aff_.data()), - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.size(), - [new_mu] HD(f_t dx_aff, f_t dz_aff) { return -(dx_aff * dz_aff) + new_mu; }, - stream_view_.value()); + fill_linear_cc_rhs(data, + new_mu, + cuopt::make_span(data.d_complementarity_xz_rhs_), + cuopt::make_span(data.d_dx_aff_), + cuopt::make_span(data.d_dz_aff_), + stream_view_); RAFT_CHECK_CUDA(stream_view_); cub::DeviceTransform::Transform( cuda::std::make_tuple(data.d_dw_aff_.data(), data.d_dv_aff_.data()), @@ -3160,13 +3312,16 @@ void barrier_solver_t::compute_mu(iteration_data_t& data, f_ { raft::common::nvtx::range fun_scope("Barrier: compute_mu"); + f_t mu_denom = static_cast(data.x.size()) + static_cast(data.n_upper_bounds); + mu_denom -= static_cast(data.n_free_vars); // free vars don't contribute to mu + mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(), data.d_complementarity_xz_residual_.size(), stream_view_) + data.sum_reduce_helper_.sum(data.d_complementarity_wv_residual_.begin(), data.d_complementarity_wv_residual_.size(), stream_view_)) / - (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); + mu_denom; } template @@ -3444,7 +3599,8 @@ lp_status_t barrier_solver_t::solve(f_t start_time, csc_matrix_t Q(lp.num_cols, 0, 0); if (lp.Q.n > 0) { create_Q(lp, Q); } - iteration_data_t data(lp, num_upper_bounds, Q, settings); + iteration_data_t data( + lp, num_upper_bounds, presolve_info.free_variable_indices, Q, settings); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Barrier solver halted\n"); return lp_status_t::CONCURRENT_LIMIT; diff --git a/cpp/src/barrier/dense_vector.hpp b/cpp/src/barrier/dense_vector.hpp index f73a9a5fce..5d6f2b12ec 100644 --- a/cpp/src/barrier/dense_vector.hpp +++ b/cpp/src/barrier/dense_vector.hpp @@ -184,6 +184,21 @@ class dense_vector_t : public std::vector { } } + void ensure_positive(f_t epsilon_adjust, const std::vector& mask) + { + f_t min_x = inf; + const i_t n = this->size(); + for (i_t i = 0; i < n; i++) { + if (mask[i]) { min_x = std::min(min_x, (*this)[i]); } + } + if (min_x <= 0.0) { + const f_t delta_x = -min_x + epsilon_adjust; + for (i_t i = 0; i < n; i++) { + if (mask[i]) { (*this)[i] += delta_x; } + } + } + } + void bound_away_from_zero(f_t epsilon_adjust) { const i_t n = this->size(); diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index c5ef847106..e3418bdc9c 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -1139,7 +1139,17 @@ i_t presolve(const lp_problem_t& original, problem.Q.check_matrix("Before free variable expansion"); - if (settings.barrier_presolve && free_variables > 0) { + // For QPs, keep free variables as-is rather than + // splitting x = v - w. The barrier solver handles them natively with a + // static regularizer on the diagonal instead of z/x complementarity terms. + if (settings.barrier_presolve && free_variables > 0 && problem.Q.n > 0) { + presolve_info.free_variable_indices.clear(); + for (i_t j = 0; j < problem.num_cols; j++) { + if (problem.lower[j] == -inf && problem.upper[j] == inf) { + presolve_info.free_variable_indices.push_back(j); + } + } + } else if (settings.barrier_presolve && free_variables > 0) { // We have a variable x_j: with -inf < x_j < inf // we create new variables v and w with 0 <= v, w and x_j = v - w // Constraints diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index d0e2d52812..2caf5673e0 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -153,6 +153,9 @@ struct presolve_info_t { // Variables that were negated to handle -inf < x_j <= u_j std::vector negated_variables; + + // Free variable indices for QP augmented system (not split, handled natively) + std::vector free_variable_indices; }; template