diff --git a/benchmarks/linear_programming/cuopt/benchmark_helper.hpp b/benchmarks/linear_programming/cuopt/benchmark_helper.hpp
index 2fb945feef..1232ed8e17 100644
--- a/benchmarks/linear_programming/cuopt/benchmark_helper.hpp
+++ b/benchmarks/linear_programming/cuopt/benchmark_helper.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 #include <cuopt/linear_programming/solve.hpp>
diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp
index 51d1b4a43d..9b79dff8af 100644
--- a/benchmarks/linear_programming/cuopt/run_mip.cpp
+++ b/benchmarks/linear_programming/cuopt/run_mip.cpp
@@ -10,7 +10,7 @@
 #include <cstdio>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
-#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <mps_parser/parser.hpp>
 #include <utilities/logger.hpp>
diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu
index e76a77dba6..18a473d64e 100644
--- a/benchmarks/linear_programming/cuopt/run_pdlp.cu
+++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu
@@ -5,7 +5,7 @@
  */
 /* clang-format on */
 
-#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index ec185ba7c4..e9b1ee3719 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -6,9 +6,9 @@
 /* clang-format on */
 
 #include <cuopt/linear_programming/backend_selection.hpp>
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/optimization_problem_utils.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <mps_parser/parser.hpp>
@@ -134,11 +134,11 @@ int run_single_file(const std::string& file_path,
   if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
     handle_ptr = std::make_unique<raft::handle_t>();
     problem_interface =
-      std::make_unique<cuopt::linear_programming::gpu_optimization_problem_t<int, double>>(
+      std::make_unique<cuopt::linear_programming::optimization_problem_t<int, double>>(
         handle_ptr.get());
   } else {
     problem_interface =
-      std::make_unique<cuopt::linear_programming::cpu_optimization_problem_t<int, double>>(nullptr);
+      std::make_unique<cuopt::linear_programming::cpu_optimization_problem_t<int, double>>();
   }
 
   // Populate the problem from MPS data model
diff --git a/cpp/include/cuopt/linear_programming/backend_selection.hpp b/cpp/include/cuopt/linear_programming/backend_selection.hpp
index 083e185c15..26bcd942a3 100644
--- a/cpp/include/cuopt/linear_programming/backend_selection.hpp
+++ b/cpp/include/cuopt/linear_programming/backend_selection.hpp
@@ -39,12 +39,6 @@ bool is_remote_execution_enabled();
  */
 execution_mode_t get_execution_mode();
 
-/**
- * @brief Check if GPU memory should be used for remote execution
- * @return true if CUOPT_USE_GPU_MEM_FOR_REMOTE is set to "true" or "1" (case-insensitive)
- */
-bool use_gpu_memory_for_remote();
-
 /**
  * @brief Check if CPU memory should be used for local execution (test mode)
  *
@@ -59,8 +53,9 @@ bool use_cpu_memory_for_local();
  * @brief Determine which memory backend to use based on execution mode
  *
  * Logic:
- *   - LOCAL execution -> GPU memory by default, CPU if CUOPT_USE_CPU_MEM_FOR_LOCAL=true (test mode)
- *   - REMOTE execution -> CPU memory by default, GPU if CUOPT_USE_GPU_MEM_FOR_REMOTE=true
+ *   - REMOTE execution -> always CPU memory
+ *   - LOCAL execution  -> GPU memory by default, CPU if CUOPT_USE_CPU_MEM_FOR_LOCAL=true (test
+ * mode)
  *
  * @return memory_backend_t::GPU or memory_backend_t::CPU
  */
diff --git a/cpp/include/cuopt/linear_programming/cpu_optimization_problem.hpp b/cpp/include/cuopt/linear_programming/cpu_optimization_problem.hpp
new file mode 100644
index 0000000000..009a8ce84e
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/cpu_optimization_problem.hpp
@@ -0,0 +1,201 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+
+#include <raft/core/handle.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+// Forward declarations
+template <typename i_t, typename f_t>
+class optimization_problem_t;
+template <typename i_t, typename f_t>
+class pdlp_solver_settings_t;
+template <typename i_t, typename f_t>
+class mip_solver_settings_t;
+template <typename i_t, typename f_t>
+class lp_solution_interface_t;
+template <typename i_t, typename f_t>
+class mip_solution_interface_t;
+
+/**
+ * @brief CPU-based implementation of optimization_problem_interface_t.
+ *
+ * This implementation stores all data in CPU memory using std::vector.
+ * It only implements host getters (returning std::vector by value).
+ * Device getters throw exceptions as GPU memory access is not supported.
+ */
+template <typename i_t, typename f_t>
+class cpu_optimization_problem_t : public optimization_problem_interface_t<i_t, f_t> {
+ public:
+  cpu_optimization_problem_t();
+
+  // Setters
+  void set_maximize(bool maximize) override;
+  void set_csr_constraint_matrix(const f_t* A_values,
+                                 i_t size_values,
+                                 const i_t* A_indices,
+                                 i_t size_indices,
+                                 const i_t* A_offsets,
+                                 i_t size_offsets) override;
+  void set_constraint_bounds(const f_t* b, i_t size) override;
+  void set_objective_coefficients(const f_t* c, i_t size) override;
+  void set_objective_scaling_factor(f_t objective_scaling_factor) override;
+  void set_objective_offset(f_t objective_offset) override;
+  void set_quadratic_objective_matrix(const f_t* Q_values,
+                                      i_t size_values,
+                                      const i_t* Q_indices,
+                                      i_t size_indices,
+                                      const i_t* Q_offsets,
+                                      i_t size_offsets,
+                                      bool validate_positive_semi_definite = false) override;
+  void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) override;
+  void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size) override;
+  void set_variable_types(const var_t* variable_types, i_t size) override;
+  void set_problem_category(const problem_category_t& category) override;
+  void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size) override;
+  void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size) override;
+  void set_row_types(const char* row_types, i_t size) override;
+  void set_objective_name(const std::string& objective_name) override;
+  void set_problem_name(const std::string& problem_name) override;
+  void set_variable_names(const std::vector<std::string>& variable_names) override;
+  void set_row_names(const std::vector<std::string>& row_names) override;
+
+  // Device getters - throw exceptions (not supported for CPU implementation)
+  i_t get_n_variables() const override;
+  i_t get_n_constraints() const override;
+  i_t get_nnz() const override;
+  i_t get_n_integers() const override;
+  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const override;
+  rmm::device_uvector<f_t>& get_constraint_matrix_values() override;
+  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const override;
+  rmm::device_uvector<i_t>& get_constraint_matrix_indices() override;
+  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const override;
+  rmm::device_uvector<i_t>& get_constraint_matrix_offsets() override;
+  const rmm::device_uvector<f_t>& get_constraint_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_bounds() override;
+  const rmm::device_uvector<f_t>& get_objective_coefficients() const override;
+  rmm::device_uvector<f_t>& get_objective_coefficients() override;
+  f_t get_objective_scaling_factor() const override;
+  f_t get_objective_offset() const override;
+  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const override;
+  rmm::device_uvector<f_t>& get_variable_lower_bounds() override;
+  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const override;
+  rmm::device_uvector<f_t>& get_variable_upper_bounds() override;
+  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_lower_bounds() override;
+  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_upper_bounds() override;
+  const rmm::device_uvector<char>& get_row_types() const override;
+  const rmm::device_uvector<var_t>& get_variable_types() const override;
+  bool get_sense() const override;
+  bool empty() const override;
+  std::string get_objective_name() const override;
+  std::string get_problem_name() const override;
+  problem_category_t get_problem_category() const override;
+  const std::vector<std::string>& get_variable_names() const override;
+  const std::vector<std::string>& get_row_names() const override;
+  const std::vector<i_t>& get_quadratic_objective_offsets() const override;
+  const std::vector<i_t>& get_quadratic_objective_indices() const override;
+  const std::vector<f_t>& get_quadratic_objective_values() const override;
+  bool has_quadratic_objective() const override;
+
+  // Host getters - these are the only supported getters for CPU implementation
+  std::vector<f_t> get_constraint_matrix_values_host() const override;
+  std::vector<i_t> get_constraint_matrix_indices_host() const override;
+  std::vector<i_t> get_constraint_matrix_offsets_host() const override;
+  std::vector<f_t> get_constraint_bounds_host() const override;
+  std::vector<f_t> get_objective_coefficients_host() const override;
+  std::vector<f_t> get_variable_lower_bounds_host() const override;
+  std::vector<f_t> get_variable_upper_bounds_host() const override;
+  std::vector<f_t> get_constraint_lower_bounds_host() const override;
+  std::vector<f_t> get_constraint_upper_bounds_host() const override;
+  std::vector<char> get_row_types_host() const override;
+  std::vector<var_t> get_variable_types_host() const override;
+
+  /**
+   * @brief Convert this CPU optimization problem to an optimization_problem_t
+   *        by copying CPU data to GPU (requires GPU memory transfer).
+   *
+   * @param handle_ptr RAFT handle with CUDA resources for GPU memory allocation.
+   * @return unique_ptr to new optimization_problem_t with all data copied to GPU
+   * @throws std::runtime_error if handle_ptr is null
+   */
+  std::unique_ptr<optimization_problem_t<i_t, f_t>> to_optimization_problem(
+    raft::handle_t const* handle_ptr = nullptr) override;
+
+  /**
+   * @brief Write the optimization problem to an MPS file.
+   * @param[in] mps_file_path Path to the output MPS file
+   */
+  void write_to_mps(const std::string& mps_file_path) override;
+
+  /**
+   * @brief Check if this problem is equivalent to another problem.
+   * @param[in] other The other optimization problem to compare against
+   * @return true if the problems are equivalent (up to permutation of variables/constraints)
+   */
+  bool is_equivalent(const optimization_problem_interface_t<i_t, f_t>& other) const override;
+
+  // C API support: Copy to host (polymorphic)
+  void copy_objective_coefficients_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_matrix_to_host(f_t* values,
+                                      i_t* indices,
+                                      i_t* offsets,
+                                      i_t num_values,
+                                      i_t num_indices,
+                                      i_t num_offsets) const override;
+  void copy_row_types_to_host(char* output, i_t size) const override;
+  void copy_constraint_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_lower_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_upper_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_lower_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_upper_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_types_to_host(var_t* output, i_t size) const override;
+
+ private:
+  problem_category_t problem_category_ = problem_category_t::LP;
+  bool maximize_{false};
+  i_t n_vars_{0};
+  i_t n_constraints_{0};
+
+  // CPU memory storage
+  std::vector<f_t> A_;
+  std::vector<i_t> A_indices_;
+  std::vector<i_t> A_offsets_;
+  std::vector<f_t> b_;
+  std::vector<f_t> c_;
+  f_t objective_scaling_factor_{1};
+  f_t objective_offset_{0};
+
+  std::vector<i_t> Q_offsets_;
+  std::vector<i_t> Q_indices_;
+  std::vector<f_t> Q_values_;
+
+  std::vector<f_t> variable_lower_bounds_;
+  std::vector<f_t> variable_upper_bounds_;
+  std::vector<f_t> constraint_lower_bounds_;
+  std::vector<f_t> constraint_upper_bounds_;
+  std::vector<char> row_types_;
+  std::vector<var_t> variable_types_;
+
+  std::string objective_name_;
+  std::string problem_name_;
+  std::vector<std::string> var_names_{};
+  std::vector<std::string> row_names_{};
+};
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp b/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp
index b9bac23a8b..0fad7059aa 100644
--- a/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp
+++ b/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp
@@ -238,59 +238,6 @@ class cpu_lp_solution_t : public lp_solution_interface_t<i_t, f_t> {
     return pdlp_warm_start_data_.iterations_since_last_restart_;
   }
 
-  /**
-   * @brief Convert CPU solution to GPU solution
-   * Copies data from host (std::vector) to device (rmm::device_uvector)
-   */
-  optimization_problem_solution_t<i_t, f_t> to_gpu_solution(
-    rmm::cuda_stream_view stream_view) override
-  {
-    // Create device vectors and copy data from host
-    rmm::device_uvector<f_t> primal_device(primal_solution_.size(), stream_view);
-    rmm::device_uvector<f_t> dual_device(dual_solution_.size(), stream_view);
-    rmm::device_uvector<f_t> reduced_cost_device(reduced_cost_.size(), stream_view);
-
-    if (!primal_solution_.empty()) {
-      raft::copy(
-        primal_device.data(), primal_solution_.data(), primal_solution_.size(), stream_view);
-    }
-    if (!dual_solution_.empty()) {
-      raft::copy(dual_device.data(), dual_solution_.data(), dual_solution_.size(), stream_view);
-    }
-    if (!reduced_cost_.empty()) {
-      raft::copy(
-        reduced_cost_device.data(), reduced_cost_.data(), reduced_cost_.size(), stream_view);
-    }
-
-    using additional_info_t =
-      typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t;
-    std::vector<additional_info_t> termination_stats(1);
-    termination_stats[0].primal_objective      = primal_objective_;
-    termination_stats[0].dual_objective        = dual_objective_;
-    termination_stats[0].solve_time            = solve_time_;
-    termination_stats[0].l2_primal_residual    = l2_primal_residual_;
-    termination_stats[0].l2_dual_residual      = l2_dual_residual_;
-    termination_stats[0].gap                   = gap_;
-    termination_stats[0].number_of_steps_taken = num_iterations_;
-    termination_stats[0].solved_by_pdlp        = solved_by_pdlp_;
-
-    std::vector<pdlp_termination_status_t> termination_status_vec = {termination_status_};
-
-    // Convert CPU warmstart to GPU warmstart
-    auto gpu_warmstart = convert_to_gpu_warmstart(pdlp_warm_start_data_, stream_view);
-
-    // Create GPU solution
-    return optimization_problem_solution_t<i_t, f_t>(primal_device,
-                                                     dual_device,
-                                                     reduced_cost_device,
-                                                     std::move(gpu_warmstart),
-                                                     "",  // objective_name
-                                                     {},  // var_names
-                                                     {},  // row_names
-                                                     std::move(termination_stats),
-                                                     std::move(termination_status_vec));
-  }
-
   /**
    * @brief Convert to CPU-backed linear_programming_ret_t struct for Python/Cython
    * Populates the cpu_solutions_t variant.  Moves std::vector data with zero-copy.
@@ -414,39 +361,6 @@ class cpu_mip_solution_t : public mip_solution_interface_t<i_t, f_t> {
 
   i_t get_num_simplex_iterations() const override { return num_simplex_iterations_; }
 
-  /**
-   * @brief Convert CPU solution to GPU solution
-   * Copies data from host (std::vector) to device (rmm::device_uvector)
-   */
-  mip_solution_t<i_t, f_t> to_gpu_solution(rmm::cuda_stream_view stream_view) override
-  {
-    // Create device vector and copy data from host
-    rmm::device_uvector<f_t> solution_device(solution_.size(), stream_view);
-
-    if (!solution_.empty()) {
-      raft::copy(solution_device.data(), solution_.data(), solution_.size(), stream_view);
-    }
-
-    // Create solver stats
-    solver_stats_t<i_t, f_t> stats;
-    stats.total_solve_time       = total_solve_time_;
-    stats.presolve_time          = presolve_time_;
-    stats.solution_bound         = solution_bound_;
-    stats.num_nodes              = num_nodes_;
-    stats.num_simplex_iterations = num_simplex_iterations_;
-
-    // Create GPU solution
-    return mip_solution_t<i_t, f_t>(std::move(solution_device),
-                                    {},  // var_names
-                                    objective_,
-                                    mip_gap_,
-                                    termination_status_,
-                                    max_constraint_violation_,
-                                    max_int_violation_,
-                                    max_variable_bound_violation_,
-                                    stats);
-  }
-
   /**
    * @brief Convert to CPU-backed mip_ret_t struct for Python/Cython
    * Populates the cpu_buffer variant.  Moves std::vector data with zero-copy.
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem.hpp b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
index d0731f3aa9..d0f624ebdf 100644
--- a/cpp/include/cuopt/linear_programming/optimization_problem.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
@@ -7,31 +7,40 @@
 
 #pragma once
 
-#include <cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp>
-
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
-#include <mps_parser/data_model_view.hpp>
 
 #include <raft/core/device_span.hpp>
 #include <raft/core/handle.hpp>
-
 #include <rmm/device_uvector.hpp>
 
 #include <cstdint>
+#include <memory>
 #include <string>
-#include <type_traits>
 #include <vector>
 
 namespace cuopt::linear_programming {
 
-enum class var_t { CONTINUOUS = 0, INTEGER };
-enum class problem_category_t : int8_t { LP = 0, MIP = 1, IP = 2 };
+// Forward declarations
+template <typename i_t, typename f_t>
+class pdlp_solver_settings_t;
+template <typename i_t, typename f_t>
+class mip_solver_settings_t;
+template <typename i_t, typename f_t>
+class lp_solution_interface_t;
+template <typename i_t, typename f_t>
+class mip_solution_interface_t;
 
 /**
  * @brief A representation of a linear programming (LP) optimization problem
  *
+ * @tparam i_t  Integer type for indices
  * @tparam f_t  Data type of the variables and their weights in the equations
  *
+ * This implementation stores all data in GPU memory using rmm::device_uvector.
+ * It implements both device getters (returning rmm::device_uvector references)
+ * and host getters (returning std::vector by copying from GPU to CPU).
+ *
  * This structure stores all the information necessary to represent the
  * following LP:
  *
@@ -56,7 +65,7 @@ enum class problem_category_t : int8_t { LP = 0, MIP = 1, IP = 2 };
  * `set_objective_offset()` methods.
  */
 template <typename i_t, typename f_t>
-class optimization_problem_t {
+class optimization_problem_t : public optimization_problem_interface_t<i_t, f_t> {
  public:
   static_assert(std::is_integral<i_t>::value,
                 "'optimization_problem_t' accepts only integer types for indexes");
@@ -102,119 +111,82 @@ class optimization_problem_t {
     raft::device_span<const f_t> constraint_upper_bounds;
   };  // struct view_t
 
-  optimization_problem_t(raft::handle_t const* handle_ptr);
+  explicit optimization_problem_t(raft::handle_t const* handle_ptr);
   optimization_problem_t(const optimization_problem_t<i_t, f_t>& other);
-
-  /**
-   * @brief Check if this optimization problem is equivalent to another.
-   *
-   * Two problems are considered equivalent if they represent the same mathematical
-   * optimization problem, potentially with variables and constraints in a different order.
-   * The mapping between problems is determined by matching variable names and row names.
-   * Essentially checks for graph isomorphism given label mappings.
-   *
-   * @param other The other optimization problem to compare against.
-   * @return true if the problems are equivalent (up to permutation of variables/constraints),
-   *         false otherwise.
-   */
-  bool is_equivalent(const optimization_problem_t<i_t, f_t>& other) const;
+  optimization_problem_t(optimization_problem_t<i_t, f_t>&&)            = default;
+  optimization_problem_t& operator=(optimization_problem_t<i_t, f_t>&&) = default;
 
   std::vector<internals::base_solution_callback_t*> mip_callbacks_;
 
+  // ============================================================================
+  // Setters
+  // ============================================================================
+
   /**
    * @brief Set the sense of optimization to maximize.
-   * @note Setting before calling the solver is optional, default value if false
-   * (minimize).
-   *
-   * @param[in] maximize true means to maximize the objective function, else
-   * minimize.
+   * @note Setting before calling the solver is optional, default is false (minimize).
+   * @param[in] maximize true means to maximize the objective function, else minimize.
    */
-  void set_maximize(bool maximize);
-  /**
-   * @brief Set the constraint matrix (A) in CSR format. For more information
-   about CSR checkout:
-   * https://docs.nvidia.com/cuda/cusparse/index.html#compressed-sparse-row-csr
+  void set_maximize(bool maximize) override;
 
+  /**
+   * @brief Set the constraint matrix (A) in CSR format.
    * @note Setting before calling the solver is mandatory.
-   *
-   * @throws cuopt::logic_error when an error occurs.
-   * @param[in] A_values Values of the CSR representation of the constraint
-   matrix as a device or host memory pointer to a floating point array of size
-   size_values.
-   * cuOpt copies this data. Copy happens on the stream of the raft:handler
-   passed to the problem.
-   * @param size_values Size of the A_values array.
-   * @param[in] A_indices Indices of the CSR representation of the constraint
-   matrix as a device or host memory pointer to an integer array of size
-   size_indices.
-   * cuOpt copies this data. Copy happens on the stream of the raft:handler
-   passed to the problem.
-   * @param size_indices Size of the A_indices array.
-   * @param[in] A_offsets Offsets of the CSR representation of the constraint
-   matrix as a device or host memory pointer to a integer array of size
-   size_offsets.
-   * cuOpt copies this data. Copy happens on the stream of the raft:handler
-   passed to the problem.
-   * @param size_offsets Size of the A_offsets array.
+   * Data is copied to GPU memory on the stream of the RAFT handle passed to the problem.
+   * @param[in] A_values Values of the CSR representation (device or host pointer)
+   * @param size_values Size of the A_values array
+   * @param[in] A_indices Indices of the CSR representation (device or host pointer)
+   * @param size_indices Size of the A_indices array
+   * @param[in] A_offsets Offsets of the CSR representation (device or host pointer)
+   * @param size_offsets Size of the A_offsets array
    */
   void set_csr_constraint_matrix(const f_t* A_values,
                                  i_t size_values,
                                  const i_t* A_indices,
                                  i_t size_indices,
                                  const i_t* A_offsets,
-                                 i_t size_offsets);
+                                 i_t size_offsets) override;
 
   /**
    * @brief Set the constraint bounds (b / right-hand side) array.
    * @note Setting before calling the solver is mandatory.
-   *
-   * @param[in] b Device or host memory pointer to a floating point array of
-   * size size. cuOpt copies this data. Copy happens on the stream of the
-   * raft:handler passed to the problem.
+   * @param[in] b Device or host memory pointer to a floating point array of size size.
    * @param size Size of the b array.
    */
-  void set_constraint_bounds(const f_t* b, i_t size);
+  void set_constraint_bounds(const f_t* b, i_t size) override;
+
   /**
    * @brief Set the objective coefficients (c) array.
    * @note Setting before calling the solver is mandatory.
-   *
-   * @param[in] c Device or host memory pointer to a floating point array of
-   * size size. cuOpt copies this data. Copy happens on the stream of the
-   * raft:handler passed to the problem.
+   * @param[in] c Device or host memory pointer to a floating point array of size size.
    * @param size Size of the c array.
    */
-  void set_objective_coefficients(const f_t* c, i_t size);
+  void set_objective_coefficients(const f_t* c, i_t size) override;
+
   /**
-   * @brief Set the scaling factor of the objective function (scaling_factor *
-   * objective_value).
-   * @note Setting before calling the solver is optional, default value if 1.
-   *
+   * @brief Set the scaling factor of the objective function (scaling_factor * objective_value).
+   * @note Setting before calling the solver is optional, default value is 1.
    * @param objective_scaling_factor Objective scaling factor value.
    */
-  void set_objective_scaling_factor(f_t objective_scaling_factor);
+  void set_objective_scaling_factor(f_t objective_scaling_factor) override;
+
   /**
-   * @brief Set the offset of the objective function (objective_offset +
-   * objective_value).
-   * @note Setting before calling the solver is optional, default value if 0.
-   *
+   * @brief Set the offset of the objective function (objective_offset + objective_value).
+   * @note Setting before calling the solver is optional, default value is 0.
    * @param objective_offset Objective offset value.
    */
-  void set_objective_offset(f_t objective_offset);
+  void set_objective_offset(f_t objective_offset) override;
 
   /**
-   * @brief Set the quadratic objective matrix (Q) in CSR format for QPS files.
-   *
-   * @note This is used for quadratic programming problems where the objective
-   * function contains quadratic terms: x^T * Q * x + c^T * x
-   *
-   * @param[in] Q_values Values of the CSR representation of the quadratic objective matrix
+   * @brief Set the quadratic objective matrix (Q) in CSR format.
+   * @note Used for quadratic programming: objective is x^T * Q * x + c^T * x
+   * @param[in] Q_values Values of the CSR representation
    * @param size_values Size of the Q_values array
-   * @param[in] Q_indices Indices of the CSR representation of the quadratic objective matrix
+   * @param[in] Q_indices Indices of the CSR representation
    * @param size_indices Size of the Q_indices array
-   * @param[in] Q_offsets Offsets of the CSR representation of the quadratic objective matrix
+   * @param[in] Q_offsets Offsets of the CSR representation
    * @param size_offsets Size of the Q_offsets array
-   * @param validate_positive_semi_definite Whether to validate if the matrix is positive semi
-   * definite
+   * @param validate_positive_semi_definite Whether to validate PSD property
    */
   void set_quadratic_objective_matrix(const f_t* Q_values,
                                       i_t size_values,
@@ -222,182 +194,151 @@ class optimization_problem_t {
                                       i_t size_indices,
                                       const i_t* Q_offsets,
                                       i_t size_offsets,
-                                      bool validate_positive_semi_definite = false);
+                                      bool validate_positive_semi_definite = false) override;
+
+  /** @copydoc optimization_problem_interface_t::set_variable_lower_bounds */
+  void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) override;
+  /** @copydoc optimization_problem_interface_t::set_variable_upper_bounds */
+  void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size) override;
+  /** @copydoc optimization_problem_interface_t::set_variable_types */
+  void set_variable_types(const var_t* variable_types, i_t size) override;
+  /** @copydoc optimization_problem_interface_t::set_problem_category */
+  void set_problem_category(const problem_category_t& category) override;
+  /** @copydoc optimization_problem_interface_t::set_constraint_lower_bounds */
+  void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size) override;
+  /** @copydoc optimization_problem_interface_t::set_constraint_upper_bounds */
+  void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size) override;
+  /** @copydoc optimization_problem_interface_t::set_row_types */
+  void set_row_types(const char* row_types, i_t size) override;
+  /** @copydoc optimization_problem_interface_t::set_objective_name */
+  void set_objective_name(const std::string& objective_name) override;
+  /** @copydoc optimization_problem_interface_t::set_problem_name */
+  void set_problem_name(const std::string& problem_name) override;
+  /** @copydoc optimization_problem_interface_t::set_variable_names */
+  void set_variable_names(const std::vector<std::string>& variable_names) override;
+  /** @copydoc optimization_problem_interface_t::set_row_names */
+  void set_row_names(const std::vector<std::string>& row_names) override;
+
+  // ============================================================================
+  // Device getters
+  // ============================================================================
+
+  i_t get_n_variables() const override;
+  i_t get_n_constraints() const override;
+  i_t get_nnz() const override;
+  i_t get_n_integers() const override;
+  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const override;
+  rmm::device_uvector<f_t>& get_constraint_matrix_values() override;
+  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const override;
+  rmm::device_uvector<i_t>& get_constraint_matrix_indices() override;
+  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const override;
+  rmm::device_uvector<i_t>& get_constraint_matrix_offsets() override;
+  const rmm::device_uvector<f_t>& get_constraint_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_bounds() override;
+  const rmm::device_uvector<f_t>& get_objective_coefficients() const override;
+  rmm::device_uvector<f_t>& get_objective_coefficients() override;
+  f_t get_objective_scaling_factor() const override;
+  f_t get_objective_offset() const override;
+  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const override;
+  rmm::device_uvector<f_t>& get_variable_lower_bounds() override;
+  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const override;
+  rmm::device_uvector<f_t>& get_variable_upper_bounds() override;
+  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_lower_bounds() override;
+  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_upper_bounds() override;
+  const rmm::device_uvector<char>& get_row_types() const override;
+  const rmm::device_uvector<var_t>& get_variable_types() const override;
+  bool get_sense() const override;
+  bool empty() const override;
+  std::string get_objective_name() const override;
+  std::string get_problem_name() const override;
+  problem_category_t get_problem_category() const override;
+  const std::vector<std::string>& get_variable_names() const override;
+  const std::vector<std::string>& get_row_names() const override;
+  const std::vector<i_t>& get_quadratic_objective_offsets() const override;
+  const std::vector<i_t>& get_quadratic_objective_indices() const override;
+  const std::vector<f_t>& get_quadratic_objective_values() const override;
+  bool has_quadratic_objective() const override;
+
+  // ============================================================================
+  // Host getters
+  // ============================================================================
+
+  std::vector<f_t> get_constraint_matrix_values_host() const override;
+  std::vector<i_t> get_constraint_matrix_indices_host() const override;
+  std::vector<i_t> get_constraint_matrix_offsets_host() const override;
+  std::vector<f_t> get_constraint_bounds_host() const override;
+  std::vector<f_t> get_objective_coefficients_host() const override;
+  std::vector<f_t> get_variable_lower_bounds_host() const override;
+  std::vector<f_t> get_variable_upper_bounds_host() const override;
+  std::vector<f_t> get_constraint_lower_bounds_host() const override;
+  std::vector<f_t> get_constraint_upper_bounds_host() const override;
+  std::vector<char> get_row_types_host() const override;
+  std::vector<var_t> get_variable_types_host() const override;
+
+  // ============================================================================
+  // File I/O
+  // ============================================================================
 
   /**
-   * @brief Get the quadratic objective matrix offsets
-   * @return const reference to the Q_offsets vector
+   * @brief Write the optimization problem to an MPS file.
+   * @param[in] mps_file_path Path to the output MPS file
    */
-  const std::vector<i_t>& get_quadratic_objective_offsets() const;
+  void write_to_mps(const std::string& mps_file_path) override;
 
-  /**
-   * @brief Get the quadratic objective matrix indices
-   * @return const reference to the Q_indices vector
-   */
-  const std::vector<i_t>& get_quadratic_objective_indices() const;
+  /* Print scaling information */
+  void print_scaling_information() const;
 
-  /**
-   * @brief Get the quadratic objective matrix values
-   * @return const reference to the Q_values vector
-   */
-  const std::vector<f_t>& get_quadratic_objective_values() const;
+  // ============================================================================
+  // Comparison
+  // ============================================================================
 
   /**
-   * @brief Set the variables (x) lower bounds.
-   * @note Setting before calling the solver is optional, default value for all
-   * is 0.
-   *
-   * @param[in] variable_lower_bounds Device or host memory pointer to a
-   * floating point array of size size. cuOpt copies this data. Copy happens on
-   * the stream of the raft:handler passed to the problem.
-   * @param size Size of the variable_lower_bounds array
+   * @brief Check if this problem is equivalent to another optimization_problem_t.
+   * @param[in] other The other optimization problem to compare against
+   * @return true if the problems are equivalent (up to permutation of variables/constraints)
    */
-  void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size);
-  /**
-   * @brief Set the variables (x) upper bounds.
-   * @note Setting before calling the solver is optional, default value for all
-   * is +infinity.
-   *
-   * @param[in] variable_upper_bounds Device or host memory pointer to a
-   * floating point array of size size. cuOpt copies this data. Copy happens on
-   * the stream of the raft:handler passed to the problem.
-   * @param size Size of the variable_upper_bounds array.
-   */
-  void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size);
-  /**
-   * @brief Set the variables types.
-   * @note Setting before calling the solver is optional, default value for all
-   * is CONTINUOUS.
-   *
-   * @param[in] variable_types Device or host memory pointer to a var_t array.
-   * cuOpt copies this data. Copy happens on the stream of the raft:handler
-   * passed to the problem.
-   * @param size Size of the variable_types array.
-   */
-  void set_variable_types(const var_t* variable_types, i_t size);
-  void set_problem_category(const problem_category_t& category);
-  /**
-   * @brief Set the constraints lower bounds.
-   * @note Setting before calling the solver is optional if you set the row
-   * type, else it's mandatory along with the upper bounds.
-   *
-   * @param[in] constraint_lower_bounds Device or host memory pointer to a
-   * floating point array of size size. cuOpt copies this data. Copy happens on
-   * the stream of the raft:handler passed to the problem.
-   * @param size Size of the constraint_lower_bounds array
-   */
-  void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size);
-  /**
-   * @brief Set the constraints upper bounds.
-   * @note Setting before calling the solver is optional if you set the row
-   * type, else it's mandatory along with the lower bounds. If both are set,
-   * priority goes to set_constraints.
-   *
-   * @param[in] constraint_upper_bounds Device or host memory pointer to a
-   * floating point array of size size. cuOpt copies this data. Copy happens on
-   * the stream of the raft:handler passed to the problem.
-   * @param size Size of the constraint_upper_bounds array
-   */
-  void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size);
+  bool is_equivalent(const optimization_problem_t<i_t, f_t>& other) const;
 
   /**
-   * @brief Set the type of each row (constraint). Possible values are:
-   * 'E' for equality ( = ): lower & upper constrains bound equal to b
-   * 'L' for less-than ( <= ): lower constrains bound equal to -infinity, upper
-   * constrains bound equal to b 'G' for greater-than ( >= ): lower constrains
-   * bound equal to b, upper constrains bound equal to +infinity
-   * @note Setting before calling the solver is optional if you set the
-   * constraint lower and upper bounds, else it's mandatory If both are set,
-   * priority goes to set_constraints.
-   *
-   * @param[in] row_types Device or host memory pointer to a character array of
-   * size size.
-   * cuOpt copies this data. Copy happens on the stream of the raft:handler
-   * passed to the problem.
-   * @param size Size of the row_types array
+   * @brief Check if this problem is equivalent to another problem (via interface).
+   * @param[in] other The other optimization problem to compare against
+   * @return true if the problems are equivalent (up to permutation of variables/constraints)
    */
-  void set_row_types(const char* row_types, i_t size);
+  bool is_equivalent(const optimization_problem_interface_t<i_t, f_t>& other) const override;
 
-  /**
-   * @brief Set the name of the objective function.
-   * @note Setting before calling the solver is optional. Value is only used for
-   * file generation of the solution.
-   *
-   * @param[in] objective_name Objective name value.
-   */
-  void set_objective_name(const std::string& objective_name);
-  /**
-   * @brief Set the problem name.
-   * @note Setting before calling the solver is optional.
-   *
-   * @param[in] problem_name Problem name value.
-   */
-  void set_problem_name(const std::string& problem_name);
-  /**
-   * @brief Set the variables names.
-   * @note Setting before calling the solver is optional. Value is only used for
-   * file generation of the solution.
-   *
-   * @param[in] variable_names Variable names values.
-   */
-  void set_variable_names(const std::vector<std::string>& variables_names);
-  /**
-   * @brief Set the row names.
-   * @note Setting before calling the solver is optional. Value is only used for
-   * file generation of the solution.
-   *
-   * @param[in] row_names Row names value.
-   */
-  void set_row_names(const std::vector<std::string>& row_names);
+  // ============================================================================
+  // Conversion
+  // ============================================================================
 
   /**
-   * @brief Write the problem to an MPS formatted file
-   *
-   * @param[in] mps_file_path Path to the MPS file to write
+   * @brief Returns nullptr since this is already a GPU problem.
+   * @return nullptr
    */
-  void write_to_mps(const std::string& mps_file_path);
+  std::unique_ptr<optimization_problem_t<i_t, f_t>> to_optimization_problem(
+    raft::handle_t const* handle_ptr = nullptr) override;
+
+  // ============================================================================
+  // C API support: Copy to host (polymorphic)
+  // ============================================================================
+
+  void copy_objective_coefficients_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_matrix_to_host(f_t* values,
+                                      i_t* indices,
+                                      i_t* offsets,
+                                      i_t num_values,
+                                      i_t num_indices,
+                                      i_t num_offsets) const override;
+  void copy_row_types_to_host(char* output, i_t size) const override;
+  void copy_constraint_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_lower_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_upper_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_lower_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_upper_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_types_to_host(var_t* output, i_t size) const override;
 
-  /* Print scaling information */
-  void print_scaling_information() const;
-
-  i_t get_n_variables() const;
-  i_t get_n_constraints() const;
-  i_t get_nnz() const;
-  i_t get_n_integers() const;
   raft::handle_t const* get_handle_ptr() const noexcept;
-  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const;
-  rmm::device_uvector<f_t>& get_constraint_matrix_values();
-  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const;
-  rmm::device_uvector<i_t>& get_constraint_matrix_indices();
-  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const;
-  rmm::device_uvector<i_t>& get_constraint_matrix_offsets();
-  const rmm::device_uvector<f_t>& get_constraint_bounds() const;
-  rmm::device_uvector<f_t>& get_constraint_bounds();
-  const rmm::device_uvector<f_t>& get_objective_coefficients() const;
-  rmm::device_uvector<f_t>& get_objective_coefficients();
-  f_t get_objective_scaling_factor() const;
-  f_t get_objective_offset() const;
-  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const;
-  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const;
-  rmm::device_uvector<f_t>& get_variable_lower_bounds();
-  rmm::device_uvector<f_t>& get_variable_upper_bounds();
-  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const;
-  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const;
-  rmm::device_uvector<f_t>& get_constraint_lower_bounds();
-  rmm::device_uvector<f_t>& get_constraint_upper_bounds();
-  const rmm::device_uvector<char>& get_row_types() const;
-  const rmm::device_uvector<var_t>& get_variable_types() const;
-  bool get_sense() const;
-  bool empty() const;
-
-  std::string get_objective_name() const;
-  std::string get_problem_name() const;
-  // Unless an integer variable is added, by default it is LP
-  problem_category_t get_problem_category() const;
-  const std::vector<std::string>& get_variable_names() const;
-  const std::vector<std::string>& get_row_names() const;
-
-  bool has_quadratic_objective() const;
 
   /**
    * @brief Gets the device-side view (with raw pointers), for ease of access
@@ -406,66 +347,38 @@ class optimization_problem_t {
   view_t view() const;
 
  private:
-  void add_row_related_vars_to_row(std::vector<i_t>& indices,
-                                   std::vector<f_t>& values,
-                                   std::vector<i_t>& A_offsets,
-                                   std::vector<i_t>& A_indices,
-                                   std::vector<f_t>& A_values);
-
-  // Pointer to library handle (RAFT) containing hardware resources information
   raft::handle_t const* handle_ptr_{nullptr};
   rmm::cuda_stream_view stream_view_;
 
-  /** problem classification */
   problem_category_t problem_category_ = problem_category_t::LP;
-  /** whether to maximize or minimize the objective function */
-  bool maximize_;
-  /** number of variables */
-  i_t n_vars_;
-  /** number of constraints in the LP representation */
-  i_t n_constraints_;
-  /**
-   * the constraint matrix itself in the CSR format
-   * @{
-   */
+  bool maximize_{false};
+  i_t n_vars_{0};
+  i_t n_constraints_{0};
+
+  // GPU memory storage
   rmm::device_uvector<f_t> A_;
   rmm::device_uvector<i_t> A_indices_;
   rmm::device_uvector<i_t> A_offsets_;
-  /** @} */
-  /** RHS of the constraints */
   rmm::device_uvector<f_t> b_;
-  /** weights in the objective function */
   rmm::device_uvector<f_t> c_;
-  /** scale factor of the objective function */
   f_t objective_scaling_factor_{1};
-  /** offset of the objective function */
   f_t objective_offset_{0};
 
-  /** Quadratic objective matrix in CSR format (for (1/2) * x^T * Q * x term) */
   std::vector<i_t> Q_offsets_;
   std::vector<i_t> Q_indices_;
   std::vector<f_t> Q_values_;
 
-  /** lower bounds of the variables (primal part) */
   rmm::device_uvector<f_t> variable_lower_bounds_;
-  /** upper bounds of the variables (primal part) */
   rmm::device_uvector<f_t> variable_upper_bounds_;
-  /** lower bounds of the constraint (dual part) */
   rmm::device_uvector<f_t> constraint_lower_bounds_;
-  /** upper bounds of the constraint (dual part) */
   rmm::device_uvector<f_t> constraint_upper_bounds_;
-  /** Type of each constraint */
   rmm::device_uvector<char> row_types_;
-  /** Type of each variable */
   rmm::device_uvector<var_t> variable_types_;
-  /** name of the objective (only a single objective is currently allowed) */
+
   std::string objective_name_;
-  /** name of the problem  */
   std::string problem_name_;
-  /** names of each of the variables in the OP */
   std::vector<std::string> var_names_{};
-  /** names of each of the rows (aka constraints or objective) in the OP */
   std::vector<std::string> row_names_{};
-};  // class optimization_problem_t
+};
 
 }  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp
index f6758a99f1..767e62e746 100644
--- a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp
@@ -7,8 +7,9 @@
 
 #pragma once
 
-#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/utilities/internals.hpp>
 
+#include <raft/core/device_span.hpp>
 #include <raft/core/handle.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -19,7 +20,13 @@
 
 namespace cuopt::linear_programming {
 
-// Forward declarations
+enum class var_t { CONTINUOUS = 0, INTEGER };
+enum class problem_category_t : int8_t { LP = 0, MIP = 1, IP = 2 };
+
+template <typename i_t, typename f_t>
+class optimization_problem_t;
+template <typename i_t, typename f_t>
+class cpu_optimization_problem_t;
 template <typename i_t, typename f_t>
 class pdlp_solver_settings_t;
 template <typename i_t, typename f_t>
@@ -237,17 +244,7 @@ class optimization_problem_interface_t {
   virtual bool has_quadratic_objective() const                                  = 0;
 
   // ============================================================================
-  // Conversion
-  // ============================================================================
-
-  /**
-   * @brief Convert to optimization_problem_t (moves data to GPU if needed)
-   * @return optimization_problem_t<i_t, f_t> GPU-backed optimization problem
-   */
-  virtual optimization_problem_t<i_t, f_t> to_optimization_problem() = 0;
-
-  // ============================================================================
-  // Getters - Host memory (CPU) - NEW
+  // Getters - Host memory (CPU)
   // ============================================================================
 
   /**
@@ -337,32 +334,6 @@ class optimization_problem_interface_t {
    */
   virtual bool is_equivalent(const optimization_problem_interface_t<i_t, f_t>& other) const = 0;
 
-  // ============================================================================
-  // Remote Execution (Polymorphic Dispatch)
-  // ============================================================================
-
-  /**
-   * @brief Solve LP problem using remote execution (polymorphic)
-   * This method dispatches to the appropriate solve_lp_remote overload based on
-   * the concrete type (GPU or CPU).
-   * @param[in] settings PDLP solver settings
-   * @return Pointer to solution interface
-   */
-  virtual std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
-    pdlp_solver_settings_t<i_t, f_t> const& settings,
-    bool problem_checking     = true,
-    bool use_pdlp_solver_mode = true) const = 0;
-
-  /**
-   * @brief Solve MIP problem using remote execution (polymorphic)
-   * This method dispatches to the appropriate solve_mip_remote overload based on
-   * the concrete type (GPU or CPU).
-   * @param[in] settings MIP solver settings
-   * @return Pointer to solution interface
-   */
-  virtual std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
-    mip_solver_settings_t<i_t, f_t> const& settings) const = 0;
-
   // ============================================================================
   // C API Support: Copy to Host (Polymorphic)
   // ============================================================================
@@ -440,377 +411,27 @@ class optimization_problem_interface_t {
    * @param[in] size Number of variables
    */
   virtual void copy_variable_types_to_host(var_t* output, i_t size) const = 0;
-};
 
-// ==============================================================================
-// GPU Implementation
-// ==============================================================================
-
-/**
- * @brief GPU-based implementation of optimization_problem_interface_t.
- *
- * This implementation stores all data in GPU memory using rmm::device_uvector.
- * It implements both device getters (returning rmm::device_uvector references)
- * and host getters (returning std::vector by copying from GPU to CPU).
- */
-template <typename i_t, typename f_t>
-class gpu_optimization_problem_t : public optimization_problem_interface_t<i_t, f_t> {
- public:
-  explicit gpu_optimization_problem_t(raft::handle_t const* handle_ptr);
-
-  // Setters
-  void set_maximize(bool maximize) override;
-  void set_csr_constraint_matrix(const f_t* A_values,
-                                 i_t size_values,
-                                 const i_t* A_indices,
-                                 i_t size_indices,
-                                 const i_t* A_offsets,
-                                 i_t size_offsets) override;
-  void set_constraint_bounds(const f_t* b, i_t size) override;
-  void set_objective_coefficients(const f_t* c, i_t size) override;
-  void set_objective_scaling_factor(f_t objective_scaling_factor) override;
-  void set_objective_offset(f_t objective_offset) override;
-  void set_quadratic_objective_matrix(const f_t* Q_values,
-                                      i_t size_values,
-                                      const i_t* Q_indices,
-                                      i_t size_indices,
-                                      const i_t* Q_offsets,
-                                      i_t size_offsets,
-                                      bool validate_positive_semi_definite = false) override;
-  void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) override;
-  void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size) override;
-  void set_variable_types(const var_t* variable_types, i_t size) override;
-  void set_problem_category(const problem_category_t& category) override;
-  void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size) override;
-  void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size) override;
-  void set_row_types(const char* row_types, i_t size) override;
-  void set_objective_name(const std::string& objective_name) override;
-  void set_problem_name(const std::string& problem_name) override;
-  void set_variable_names(const std::vector<std::string>& variable_names) override;
-  void set_row_names(const std::vector<std::string>& row_names) override;
-
-  // Device getters
-  i_t get_n_variables() const override;
-  i_t get_n_constraints() const override;
-  i_t get_nnz() const override;
-  i_t get_n_integers() const override;
-  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const override;
-  rmm::device_uvector<f_t>& get_constraint_matrix_values() override;
-  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const override;
-  rmm::device_uvector<i_t>& get_constraint_matrix_indices() override;
-  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const override;
-  rmm::device_uvector<i_t>& get_constraint_matrix_offsets() override;
-  const rmm::device_uvector<f_t>& get_constraint_bounds() const override;
-  rmm::device_uvector<f_t>& get_constraint_bounds() override;
-  const rmm::device_uvector<f_t>& get_objective_coefficients() const override;
-  rmm::device_uvector<f_t>& get_objective_coefficients() override;
-  f_t get_objective_scaling_factor() const override;
-  f_t get_objective_offset() const override;
-  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const override;
-  rmm::device_uvector<f_t>& get_variable_lower_bounds() override;
-  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const override;
-  rmm::device_uvector<f_t>& get_variable_upper_bounds() override;
-  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const override;
-  rmm::device_uvector<f_t>& get_constraint_lower_bounds() override;
-  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const override;
-  rmm::device_uvector<f_t>& get_constraint_upper_bounds() override;
-  const rmm::device_uvector<char>& get_row_types() const override;
-  const rmm::device_uvector<var_t>& get_variable_types() const override;
-  bool get_sense() const override;
-  bool empty() const override;
-  std::string get_objective_name() const override;
-  std::string get_problem_name() const override;
-  problem_category_t get_problem_category() const override;
-  const std::vector<std::string>& get_variable_names() const override;
-  const std::vector<std::string>& get_row_names() const override;
-  const std::vector<i_t>& get_quadratic_objective_offsets() const override;
-  const std::vector<i_t>& get_quadratic_objective_indices() const override;
-  const std::vector<f_t>& get_quadratic_objective_values() const override;
-  bool has_quadratic_objective() const override;
-
-  // Host getters
-  std::vector<f_t> get_constraint_matrix_values_host() const override;
-  std::vector<i_t> get_constraint_matrix_indices_host() const override;
-  std::vector<i_t> get_constraint_matrix_offsets_host() const override;
-  std::vector<f_t> get_constraint_bounds_host() const override;
-  std::vector<f_t> get_objective_coefficients_host() const override;
-  std::vector<f_t> get_variable_lower_bounds_host() const override;
-  std::vector<f_t> get_variable_upper_bounds_host() const override;
-  std::vector<f_t> get_constraint_lower_bounds_host() const override;
-  std::vector<f_t> get_constraint_upper_bounds_host() const override;
-  std::vector<char> get_row_types_host() const override;
-  std::vector<var_t> get_variable_types_host() const override;
-
-  /**
-   * @brief Convert this GPU optimization problem to an optimization_problem_t
-   *        by moving GPU data ownership (zero-copy transfer).
-   * @return optimization_problem_t with ownership of all GPU data
-   */
-  optimization_problem_t<i_t, f_t> to_optimization_problem();
-
-  /**
-   * @brief Write the optimization problem to an MPS file.
-   * @param[in] mps_file_path Path to the output MPS file
-   */
-  void write_to_mps(const std::string& mps_file_path) override;
+  // ============================================================================
+  // Conversion
+  // ============================================================================
 
   /**
-   * @brief Check if this problem is equivalent to another problem.
-   * @param[in] other The other optimization problem to compare against
-   * @return true if the problems are equivalent (up to permutation of variables/constraints)
-   */
-  bool is_equivalent(const optimization_problem_interface_t<i_t, f_t>& other) const override;
-
-  // Remote execution (polymorphic dispatch)
-  std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
-    pdlp_solver_settings_t<i_t, f_t> const& settings,
-    bool problem_checking     = true,
-    bool use_pdlp_solver_mode = true) const override;
-
-  std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
-    mip_solver_settings_t<i_t, f_t> const& settings) const override;
-
-  // C API support: Copy to host (polymorphic)
-  void copy_objective_coefficients_to_host(f_t* output, i_t size) const override;
-  void copy_constraint_matrix_to_host(f_t* values,
-                                      i_t* indices,
-                                      i_t* offsets,
-                                      i_t num_values,
-                                      i_t num_indices,
-                                      i_t num_offsets) const override;
-  void copy_row_types_to_host(char* output, i_t size) const override;
-  void copy_constraint_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_constraint_lower_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_constraint_upper_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_variable_lower_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_variable_upper_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_variable_types_to_host(var_t* output, i_t size) const override;
-
-  raft::handle_t const* get_handle_ptr() const noexcept;
-
- private:
-  raft::handle_t const* handle_ptr_{nullptr};
-  rmm::cuda_stream_view stream_view_;
-
-  problem_category_t problem_category_ = problem_category_t::LP;
-  bool maximize_{false};
-  i_t n_vars_{0};
-  i_t n_constraints_{0};
-
-  // GPU memory storage
-  rmm::device_uvector<f_t> A_;
-  rmm::device_uvector<i_t> A_indices_;
-  rmm::device_uvector<i_t> A_offsets_;
-  rmm::device_uvector<f_t> b_;
-  rmm::device_uvector<f_t> c_;
-  f_t objective_scaling_factor_{1};
-  f_t objective_offset_{0};
-
-  std::vector<i_t> Q_offsets_;
-  std::vector<i_t> Q_indices_;
-  std::vector<f_t> Q_values_;
-
-  rmm::device_uvector<f_t> variable_lower_bounds_;
-  rmm::device_uvector<f_t> variable_upper_bounds_;
-  rmm::device_uvector<f_t> constraint_lower_bounds_;
-  rmm::device_uvector<f_t> constraint_upper_bounds_;
-  rmm::device_uvector<char> row_types_;
-  rmm::device_uvector<var_t> variable_types_;
-
-  std::string objective_name_;
-  std::string problem_name_;
-  std::vector<std::string> var_names_{};
-  std::vector<std::string> row_names_{};
-};
-
-// ==============================================================================
-// CPU Implementation
-// ==============================================================================
-
-/**
- * @brief CPU-based implementation of optimization_problem_interface_t.
- *
- * This implementation stores all data in CPU memory using std::vector.
- * It only implements host getters (returning std::vector references).
- * Device getters throw exceptions as GPU memory access is not supported.
- */
-template <typename i_t, typename f_t>
-class cpu_optimization_problem_t : public optimization_problem_interface_t<i_t, f_t> {
- public:
-  explicit cpu_optimization_problem_t(raft::handle_t const* handle_ptr = nullptr);
-
-  // Setters
-  void set_maximize(bool maximize) override;
-  void set_csr_constraint_matrix(const f_t* A_values,
-                                 i_t size_values,
-                                 const i_t* A_indices,
-                                 i_t size_indices,
-                                 const i_t* A_offsets,
-                                 i_t size_offsets) override;
-  void set_constraint_bounds(const f_t* b, i_t size) override;
-  void set_objective_coefficients(const f_t* c, i_t size) override;
-  void set_objective_scaling_factor(f_t objective_scaling_factor) override;
-  void set_objective_offset(f_t objective_offset) override;
-  void set_quadratic_objective_matrix(const f_t* Q_values,
-                                      i_t size_values,
-                                      const i_t* Q_indices,
-                                      i_t size_indices,
-                                      const i_t* Q_offsets,
-                                      i_t size_offsets,
-                                      bool validate_positive_semi_definite = false) override;
-  void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) override;
-  void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size) override;
-  void set_variable_types(const var_t* variable_types, i_t size) override;
-  void set_problem_category(const problem_category_t& category) override;
-  void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size) override;
-  void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size) override;
-  void set_row_types(const char* row_types, i_t size) override;
-  void set_objective_name(const std::string& objective_name) override;
-  void set_problem_name(const std::string& problem_name) override;
-  void set_variable_names(const std::vector<std::string>& variable_names) override;
-  void set_row_names(const std::vector<std::string>& row_names) override;
-
-  // Scalar and metadata getters (implemented for CPU; do not throw)
-  i_t get_n_variables() const override;
-  i_t get_n_constraints() const override;
-  i_t get_nnz() const override;
-  i_t get_n_integers() const override;
-  f_t get_objective_scaling_factor() const override;
-  f_t get_objective_offset() const override;
-  bool get_sense() const override;
-  bool empty() const override;
-  std::string get_objective_name() const override;
-  std::string get_problem_name() const override;
-  problem_category_t get_problem_category() const override;
-  const std::vector<std::string>& get_variable_names() const override;
-  const std::vector<std::string>& get_row_names() const override;
-  const std::vector<i_t>& get_quadratic_objective_offsets() const override;
-  const std::vector<i_t>& get_quadratic_objective_indices() const override;
-  const std::vector<f_t>& get_quadratic_objective_values() const override;
-  bool has_quadratic_objective() const override;
-
-  // Device memory accessors - throw on CPU implementation (use host getters below instead)
-  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const override;
-  rmm::device_uvector<f_t>& get_constraint_matrix_values() override;
-  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const override;
-  rmm::device_uvector<i_t>& get_constraint_matrix_indices() override;
-  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const override;
-  rmm::device_uvector<i_t>& get_constraint_matrix_offsets() override;
-  const rmm::device_uvector<f_t>& get_constraint_bounds() const override;
-  rmm::device_uvector<f_t>& get_constraint_bounds() override;
-  const rmm::device_uvector<f_t>& get_objective_coefficients() const override;
-  rmm::device_uvector<f_t>& get_objective_coefficients() override;
-  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const override;
-  rmm::device_uvector<f_t>& get_variable_lower_bounds() override;
-  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const override;
-  rmm::device_uvector<f_t>& get_variable_upper_bounds() override;
-  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const override;
-  rmm::device_uvector<f_t>& get_constraint_lower_bounds() override;
-  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const override;
-  rmm::device_uvector<f_t>& get_constraint_upper_bounds() override;
-  const rmm::device_uvector<char>& get_row_types() const override;
-  const rmm::device_uvector<var_t>& get_variable_types() const override;
-
-  // Host getters - these are the only supported getters for CPU implementation
-  std::vector<f_t> get_constraint_matrix_values_host() const override;
-  std::vector<i_t> get_constraint_matrix_indices_host() const override;
-  std::vector<i_t> get_constraint_matrix_offsets_host() const override;
-  std::vector<f_t> get_constraint_bounds_host() const override;
-  std::vector<f_t> get_objective_coefficients_host() const override;
-  std::vector<f_t> get_variable_lower_bounds_host() const override;
-  std::vector<f_t> get_variable_upper_bounds_host() const override;
-  std::vector<f_t> get_constraint_lower_bounds_host() const override;
-  std::vector<f_t> get_constraint_upper_bounds_host() const override;
-  std::vector<char> get_row_types_host() const override;
-  std::vector<var_t> get_variable_types_host() const override;
-
-  /**
-   * @brief Set the CUDA handle for GPU operations
+   * @brief Convert to a GPU-backed optimization_problem_t.
    *
-   * This is used in test mode when a CPU problem needs to be converted to GPU
-   * for local solving. The handle must be set before calling to_optimization_problem().
+   * For optimization_problem_t (GPU): returns nullptr (already is one).
+   * For cpu_optimization_problem_t: creates new GPU problem, copies data, returns owned pointer.
    *
-   * @param handle_ptr Pointer to the RAFT handle with CUDA resources
-   */
-  void set_handle(raft::handle_t const* handle_ptr) { handle_ptr_ = handle_ptr; }
-
-  /**
-   * @brief Convert this CPU optimization problem to an optimization_problem_t
-   *        by copying CPU data to GPU (requires GPU memory transfer).
-   * @return optimization_problem_t with all data copied to GPU
-   */
-  optimization_problem_t<i_t, f_t> to_optimization_problem();
-
-  /**
-   * @brief Write the optimization problem to an MPS file.
-   * @param[in] mps_file_path Path to the output MPS file
-   */
-  void write_to_mps(const std::string& mps_file_path) override;
-
-  /**
-   * @brief Check if this problem is equivalent to another problem.
-   * @param[in] other The other optimization problem to compare against
-   * @return true if the problems are equivalent (up to permutation of variables/constraints)
+   * Usage pattern:
+   *   auto temp = problem_interface->to_optimization_problem(&handle);
+   *   optimization_problem_t& op = temp ? *temp : static_cast<optimization_problem_t&>(*this);
+   *
+   * @param handle_ptr RAFT handle with CUDA resources for GPU memory allocation.
+   *                   Required for CPU->GPU conversion. Ignored for GPU problems.
+   * @return unique_ptr to new GPU problem, or nullptr if already a GPU problem
    */
-  bool is_equivalent(const optimization_problem_interface_t<i_t, f_t>& other) const override;
-
-  // Remote execution (polymorphic dispatch)
-  std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
-    pdlp_solver_settings_t<i_t, f_t> const& settings,
-    bool problem_checking     = true,
-    bool use_pdlp_solver_mode = true) const override;
-
-  std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
-    mip_solver_settings_t<i_t, f_t> const& settings) const override;
-
-  // C API support: Copy to host (polymorphic)
-  void copy_objective_coefficients_to_host(f_t* output, i_t size) const override;
-  void copy_constraint_matrix_to_host(f_t* values,
-                                      i_t* indices,
-                                      i_t* offsets,
-                                      i_t num_values,
-                                      i_t num_indices,
-                                      i_t num_offsets) const override;
-  void copy_row_types_to_host(char* output, i_t size) const override;
-  void copy_constraint_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_constraint_lower_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_constraint_upper_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_variable_lower_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_variable_upper_bounds_to_host(f_t* output, i_t size) const override;
-  void copy_variable_types_to_host(var_t* output, i_t size) const override;
-
- private:
-  raft::handle_t const* handle_ptr_{nullptr};
-
-  problem_category_t problem_category_ = problem_category_t::LP;
-  bool maximize_{false};
-  i_t n_vars_{0};
-  i_t n_constraints_{0};
-
-  // CPU memory storage
-  std::vector<f_t> A_;
-  std::vector<i_t> A_indices_;
-  std::vector<i_t> A_offsets_;
-  std::vector<f_t> b_;
-  std::vector<f_t> c_;
-  f_t objective_scaling_factor_{1};
-  f_t objective_offset_{0};
-
-  std::vector<i_t> Q_offsets_;
-  std::vector<i_t> Q_indices_;
-  std::vector<f_t> Q_values_;
-
-  std::vector<f_t> variable_lower_bounds_;
-  std::vector<f_t> variable_upper_bounds_;
-  std::vector<f_t> constraint_lower_bounds_;
-  std::vector<f_t> constraint_upper_bounds_;
-  std::vector<char> row_types_;
-  std::vector<var_t> variable_types_;
-
-  std::string objective_name_;
-  std::string problem_name_;
-  std::vector<std::string> var_names_{};
-  std::vector<std::string> row_names_{};
+  virtual std::unique_ptr<optimization_problem_t<i_t, f_t>> to_optimization_problem(
+    raft::handle_t const* handle_ptr = nullptr) = 0;
 };
 
 }  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/gpu_optimization_problem_solution.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_solution.hpp
similarity index 97%
rename from cpp/include/cuopt/linear_programming/gpu_optimization_problem_solution.hpp
rename to cpp/include/cuopt/linear_programming/optimization_problem_solution.hpp
index bbb56554f0..fde092c2b6 100644
--- a/cpp/include/cuopt/linear_programming/gpu_optimization_problem_solution.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem_solution.hpp
@@ -311,12 +311,6 @@ class gpu_lp_solution_t : public lp_solution_interface_t<i_t, f_t> {
       .iterations_since_last_restart_;
   }
 
-  optimization_problem_solution_t<i_t, f_t> to_gpu_solution(rmm::cuda_stream_view) override
-  {
-    throw cuopt::logic_error("to_gpu_solution called on already GPU-backed solution",
-                             error_type_t::ValidationError);
-  }
-
   /**
    * @brief Convert GPU solution to CPU solution
    * Copies data from device to host for test mode or CPU-only environments.
@@ -443,12 +437,6 @@ class gpu_mip_solution_t : public mip_solution_interface_t<i_t, f_t> {
 
   i_t get_num_simplex_iterations() const override { return solution_.get_num_simplex_iterations(); }
 
-  mip_solution_t<i_t, f_t> to_gpu_solution(rmm::cuda_stream_view) override
-  {
-    throw cuopt::logic_error("to_gpu_solution called on already GPU-backed MIP solution",
-                             error_type_t::ValidationError);
-  }
-
   /**
    * @brief Convert GPU MIP solution to CPU MIP solution
    * Copies data from device to host for test mode or CPU-only environments.
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp
index 379da05105..70b234d33f 100644
--- a/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp
@@ -34,7 +34,7 @@ class mip_solution_t;
  * @brief Abstract interface for optimization problem solutions (LP and MIP)
  *
  * This interface allows for CPU or GPU-backed solution storage.
- * - gpu_optimization_problem_solution_t: Uses rmm::device_uvector (GPU memory)
+ * - optimization_problem_solution_t: Uses rmm::device_uvector (GPU memory)
  * - cpu_optimization_problem_solution_t: Uses std::vector (CPU/host memory)
  *
  * @tparam i_t Integer type for indices
@@ -329,15 +329,6 @@ class lp_solution_interface_t : public optimization_problem_solution_interface_t
   virtual f_t get_sum_solution_weight() const                                        = 0;
   virtual i_t get_iterations_since_last_restart() const                              = 0;
 
-  /**
-   * @brief Convert to optimization_problem_solution_t (GPU-backed)
-   * This is used for remote execution: CPU solution -> GPU solution for return
-   * @param stream_view CUDA stream for device allocations
-   * @return GPU-backed solution
-   */
-  virtual optimization_problem_solution_t<i_t, f_t> to_gpu_solution(
-    rmm::cuda_stream_view stream_view) = 0;
-
   /**
    * @brief Convert to Python/Cython return type (polymorphic version)
    * This method allows backend-agnostic conversion to Python return structs.
@@ -477,14 +468,6 @@ class mip_solution_interface_t : public optimization_problem_solution_interface_
    */
   virtual i_t get_num_simplex_iterations() const = 0;
 
-  /**
-   * @brief Convert to mip_solution_t (GPU-backed)
-   * This is used for remote execution: CPU solution -> GPU solution for return
-   * @param stream_view CUDA stream for device allocations
-   * @return GPU-backed solution
-   */
-  virtual mip_solution_t<i_t, f_t> to_gpu_solution(rmm::cuda_stream_view stream_view) = 0;
-
   /**
    * @brief Convert to Python/Cython return type (polymorphic version)
    * This method allows backend-agnostic conversion to Python return structs.
diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp
index 5b621062f1..e033d99c72 100644
--- a/cpp/include/cuopt/linear_programming/solve.hpp
+++ b/cpp/include/cuopt/linear_programming/solve.hpp
@@ -7,10 +7,10 @@
 
 #pragma once
 
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
@@ -148,6 +148,23 @@ optimization_problem_t<i_t, f_t> mps_data_model_to_optimization_problem(
   raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& data_model);
 
+// ============================================================================
+// CPU problem overloads (convert to GPU, solve, convert solution back)
+// ============================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp(
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
+  pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{},
+  bool problem_checking                            = true,
+  bool use_pdlp_solver_mode                        = true,
+  bool is_batch_mode                               = false);
+
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip(
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
+  mip_solver_settings_t<i_t, f_t> const& settings = mip_solver_settings_t<i_t, f_t>{});
+
 // ============================================================================
 // New overloads for optimization_problem_interface_t with remote execution
 // ============================================================================
diff --git a/cpp/include/cuopt/linear_programming/solve_remote.hpp b/cpp/include/cuopt/linear_programming/solve_remote.hpp
index f46cb125af..f8f77b31e5 100644
--- a/cpp/include/cuopt/linear_programming/solve_remote.hpp
+++ b/cpp/include/cuopt/linear_programming/solve_remote.hpp
@@ -14,13 +14,10 @@
 
 namespace cuopt::linear_programming {
 
-// Forward declarations for optimization problems (only declaration needed, not definition)
+// Forward declarations (only declaration needed, not definition)
 template <typename i_t, typename f_t>
 class cpu_optimization_problem_t;
 
-template <typename i_t, typename f_t>
-class gpu_optimization_problem_t;
-
 template <typename i_t, typename f_t>
 struct pdlp_solver_settings_t;
 
@@ -41,16 +38,6 @@ std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
   bool problem_checking     = true,
   bool use_pdlp_solver_mode = true);
 
-/**
- * @brief Solve LP problem remotely (GPU backend)
- */
-template <typename i_t, typename f_t>
-std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
-  gpu_optimization_problem_t<i_t, f_t> const& gpu_problem,
-  pdlp_solver_settings_t<i_t, f_t> const& settings,
-  bool problem_checking     = true,
-  bool use_pdlp_solver_mode = true);
-
 /**
  * @brief Solve MIP problem remotely (CPU backend)
  */
@@ -59,12 +46,4 @@ std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
   cpu_optimization_problem_t<i_t, f_t> const& cpu_problem,
   mip_solver_settings_t<i_t, f_t> const& settings);
 
-/**
- * @brief Solve MIP problem remotely (GPU backend)
- */
-template <typename i_t, typename f_t>
-std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
-  gpu_optimization_problem_t<i_t, f_t> const& gpu_problem,
-  mip_solver_settings_t<i_t, f_t> const& settings);
-
 }  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
index 81a7cb473c..47116cd95a 100644
--- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
@@ -7,7 +7,6 @@
 
 #pragma once
 
-#include <cuopt/linear_programming/optimization_problem.hpp>
 #include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
diff --git a/cpp/src/mip_heuristics/problem/host_helper.cuh b/cpp/src/mip_heuristics/problem/host_helper.cuh
index 8327c681c7..00015282e9 100644
--- a/cpp/src/mip_heuristics/problem/host_helper.cuh
+++ b/cpp/src/mip_heuristics/problem/host_helper.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -10,7 +10,7 @@
 #include <utilities/copy_helpers.hpp>
 #include <utilities/macros.cuh>
 
-#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 
 #include <vector>
 
diff --git a/cpp/src/mip_heuristics/problem/load_balanced_problem.cuh b/cpp/src/mip_heuristics/problem/load_balanced_problem.cuh
index a8fb088271..85c54a622e 100644
--- a/cpp/src/mip_heuristics/problem/load_balanced_problem.cuh
+++ b/cpp/src/mip_heuristics/problem/load_balanced_problem.cuh
@@ -11,7 +11,7 @@
 // FOR COMPILE TIME, WE SHOULD KEEP THE INCLUDES ON THIS HEADER MINIMAL
 
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
-#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include "host_helper.cuh"
 
 #include <utilities/macros.cuh>
diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index d5e9464e62..f5a2172f2e 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -6,6 +6,7 @@
 /* clang-format on */
 
 #include <cuopt/error.hpp>
+#include <cuopt/linear_programming/solve_remote.hpp>
 
 #include <mip_heuristics/mip_constants.hpp>
 #include <mip_heuristics/presolve/third_party_presolve.hpp>
@@ -25,10 +26,11 @@
 #include <utilities/version_info.hpp>
 
 #include <cuopt/linear_programming/backend_selection.hpp>
-#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
@@ -377,64 +379,77 @@ mip_solution_t<i_t, f_t> solve_mip(
   return solve_mip(op_problem, settings);
 }
 
-/**
- * @brief Solve MIP using polymorphic problem interface
- *
- * This overload accepts the abstract optimization_problem_interface_t, allowing
- * both GPU and CPU-backed problems. Handles remote execution and test mode.
- */
+// ============================================================================
+// CPU problem overload (convert to GPU, solve, convert solution back)
+// ============================================================================
+
 template <typename i_t, typename f_t>
 std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip(
-  optimization_problem_interface_t<i_t, f_t>* problem_interface,
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
   mip_solver_settings_t<i_t, f_t> const& settings)
 {
-  try {
-    // Check if remote execution is enabled
-    if (is_remote_execution_enabled()) {
-      CUOPT_LOG_INFO("Remote MIP solve requested");
-      return problem_interface->solve_mip_remote(settings);
-    } else {
-      // Local execution - convert to optimization_problem_t and call original solve_mip
-      CUOPT_LOG_INFO("Local MIP solve");
+  CUOPT_LOG_INFO("solve_mip (CPU problem) - converting to GPU for local solve");
 
-      // Check if this is a CPU problem (test mode: CUOPT_USE_CPU_MEM_FOR_LOCAL=true)
-      auto* cpu_prob = dynamic_cast<cpu_optimization_problem_t<i_t, f_t>*>(problem_interface);
-      if (cpu_prob != nullptr) {
-        CUOPT_LOG_INFO("Test mode: Converting CPU problem to GPU for local MIP solve");
-
-        // Create CUDA resources for the conversion
-        rmm::cuda_stream stream;
-        raft::handle_t handle(stream);
+  // Create CUDA resources for the conversion
+  rmm::cuda_stream stream;
+  raft::handle_t handle(stream);
 
-        // Temporarily set the handle on the CPU problem so it can create GPU resources
-        cpu_prob->set_handle(&handle);
+  // Convert CPU problem to GPU problem
+  auto gpu_problem = cpu_problem.to_optimization_problem(&handle);
 
-        // Convert CPU problem to GPU problem
-        auto op_problem = cpu_prob->to_optimization_problem();
+  // Synchronize before solving to ensure conversion is complete
+  stream.synchronize();
 
-        // Clear the handle to avoid dangling pointer after this scope
-        cpu_prob->set_handle(nullptr);
+  // Solve on GPU
+  auto gpu_solution = solve_mip<i_t, f_t>(*gpu_problem, settings);
 
-        // Synchronize before solving to ensure conversion is complete
-        stream.synchronize();
+  // Ensure all GPU work from the solve is complete before D2H copies in to_cpu_solution(),
+  // which uses rmm::cuda_stream_per_thread (a different stream than the solver used).
+  stream.synchronize();
 
-        // Solve on GPU
-        auto gpu_solution = solve_mip<i_t, f_t>(op_problem, settings);
+  // Convert GPU solution back to CPU
+  gpu_mip_solution_t<i_t, f_t> gpu_sol_interface(std::move(gpu_solution));
+  return gpu_sol_interface.to_cpu_solution();
+}
 
-        // Ensure all GPU work from the solve is complete before to_cpu_solution() D2H copies.
-        stream.synchronize();
+// ============================================================================
+// Interface-based solve overload with remote execution support
+// ============================================================================
 
-        CUOPT_LOG_INFO("Test mode: Converting GPU solution back to CPU solution");
-        gpu_mip_solution_t<i_t, f_t> gpu_sol_interface(std::move(gpu_solution));
-        return gpu_sol_interface.to_cpu_solution();
-      }
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip(
+  optimization_problem_interface_t<i_t, f_t>* problem_interface,
+  mip_solver_settings_t<i_t, f_t> const& settings)
+{
+  cuopt_expects(problem_interface != nullptr,
+                error_type_t::ValidationError,
+                "problem_interface cannot be null");
 
-      auto op_problem   = problem_interface->to_optimization_problem();
-      auto gpu_solution = solve_mip<i_t, f_t>(op_problem, settings);
+  try {
+    // Check if remote execution is enabled (always uses CPU backend)
+    if (is_remote_execution_enabled()) {
+      auto* cpu_prob = dynamic_cast<cpu_optimization_problem_t<i_t, f_t>*>(problem_interface);
+      cuopt_expects(cpu_prob != nullptr,
+                    error_type_t::ValidationError,
+                    "Remote execution requires CPU memory backend");
+      CUOPT_LOG_INFO("Remote MIP solve requested");
+      return solve_mip_remote(*cpu_prob, settings);
+    }
 
-      // Wrap GPU solution in interface and return
-      return std::make_unique<gpu_mip_solution_t<i_t, f_t>>(std::move(gpu_solution));
+    // Local execution - dispatch to appropriate overload based on problem type
+    auto* cpu_prob = dynamic_cast<cpu_optimization_problem_t<i_t, f_t>*>(problem_interface);
+    if (cpu_prob != nullptr) {
+      // CPU problem: use CPU overload (converts to GPU, solves, converts solution back)
+      return solve_mip(*cpu_prob, settings);
     }
+
+    // GPU problem: call GPU solver directly
+    auto* gpu_prob = dynamic_cast<optimization_problem_t<i_t, f_t>*>(problem_interface);
+    cuopt_expects(gpu_prob != nullptr,
+                  error_type_t::ValidationError,
+                  "problem_interface must be either a CPU or GPU optimization problem");
+    auto gpu_solution = solve_mip<i_t, f_t>(*gpu_prob, settings);
+    return std::make_unique<gpu_mip_solution_t<i_t, f_t>>(std::move(gpu_solution));
   } catch (const cuopt::logic_error& e) {
     CUOPT_LOG_ERROR("Error in solve_mip (interface): %s", e.what());
     throw;
@@ -444,17 +459,20 @@ std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip(
   }
 }
 
-#define INSTANTIATE(F_TYPE)                                                  \
-  template mip_solution_t<int, F_TYPE> solve_mip(                            \
-    optimization_problem_t<int, F_TYPE>& op_problem,                         \
-    mip_solver_settings_t<int, F_TYPE> const& settings);                     \
-                                                                             \
-  template mip_solution_t<int, F_TYPE> solve_mip(                            \
-    raft::handle_t const* handle_ptr,                                        \
-    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model,  \
-    mip_solver_settings_t<int, F_TYPE> const& settings);                     \
-                                                                             \
-  template std::unique_ptr<mip_solution_interface_t<int, F_TYPE>> solve_mip( \
+#define INSTANTIATE(F_TYPE)                                                               \
+  template mip_solution_t<int, F_TYPE> solve_mip(                                         \
+    optimization_problem_t<int, F_TYPE>& op_problem,                                      \
+    mip_solver_settings_t<int, F_TYPE> const& settings);                                  \
+                                                                                          \
+  template mip_solution_t<int, F_TYPE> solve_mip(                                         \
+    raft::handle_t const* handle_ptr,                                                     \
+    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model,               \
+    mip_solver_settings_t<int, F_TYPE> const& settings);                                  \
+                                                                                          \
+  template std::unique_ptr<mip_solution_interface_t<int, F_TYPE>> solve_mip(              \
+    cpu_optimization_problem_t<int, F_TYPE>&, mip_solver_settings_t<int, F_TYPE> const&); \
+                                                                                          \
+  template std::unique_ptr<mip_solution_interface_t<int, F_TYPE>> solve_mip(              \
     optimization_problem_interface_t<int, F_TYPE>*, mip_solver_settings_t<int, F_TYPE> const&);
 
 #if MIP_INSTANTIATE_FLOAT
diff --git a/cpp/src/pdlp/CMakeLists.txt b/cpp/src/pdlp/CMakeLists.txt
index ca76727efa..30fc3cd3ff 100644
--- a/cpp/src/pdlp/CMakeLists.txt
+++ b/cpp/src/pdlp/CMakeLists.txt
@@ -7,7 +7,6 @@
 set(LP_CORE_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/optimization_problem.cu
-  ${CMAKE_CURRENT_SOURCE_DIR}/gpu_optimization_problem.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/cpu_optimization_problem.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/backend_selection.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/utilities/problem_checking.cu
diff --git a/cpp/src/pdlp/backend_selection.cpp b/cpp/src/pdlp/backend_selection.cpp
index 2f37443537..4f2dc62bcf 100644
--- a/cpp/src/pdlp/backend_selection.cpp
+++ b/cpp/src/pdlp/backend_selection.cpp
@@ -28,18 +28,6 @@ execution_mode_t get_execution_mode()
   return is_remote_execution_enabled() ? execution_mode_t::REMOTE : execution_mode_t::LOCAL;
 }
 
-bool use_gpu_memory_for_remote()
-{
-  const char* use_gpu_mem = std::getenv("CUOPT_USE_GPU_MEM_FOR_REMOTE");
-  if (use_gpu_mem != nullptr) {
-    std::string value(use_gpu_mem);
-    // Convert to lowercase for case-insensitive comparison
-    std::transform(value.begin(), value.end(), value.begin(), ::tolower);
-    return (value == "true" || value == "1");
-  }
-  return false;
-}
-
 bool use_cpu_memory_for_local()
 {
   const char* use_cpu_mem = std::getenv("CUOPT_USE_CPU_MEM_FOR_LOCAL");
@@ -54,12 +42,9 @@ bool use_cpu_memory_for_local()
 
 memory_backend_t get_memory_backend_type()
 {
-  if (get_execution_mode() == execution_mode_t::LOCAL) {
-    // Local execution: GPU memory by default, CPU if test mode enabled
-    return use_cpu_memory_for_local() ? memory_backend_t::CPU : memory_backend_t::GPU;
-  }
-  // Remote execution: CPU memory by default, GPU if explicitly requested
-  return use_gpu_memory_for_remote() ? memory_backend_t::GPU : memory_backend_t::CPU;
+  if (get_execution_mode() == execution_mode_t::REMOTE) { return memory_backend_t::CPU; }
+  // Local execution: GPU memory by default, CPU if CUOPT_USE_CPU_MEM_FOR_LOCAL is set
+  return use_cpu_memory_for_local() ? memory_backend_t::CPU : memory_backend_t::GPU;
 }
 
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/pdlp/cpu_optimization_problem.cpp b/cpp/src/pdlp/cpu_optimization_problem.cpp
index 012dbff930..9a30c31ef2 100644
--- a/cpp/src/pdlp/cpu_optimization_problem.cpp
+++ b/cpp/src/pdlp/cpu_optimization_problem.cpp
@@ -6,8 +6,9 @@
 /* clang-format on */
 
 #include <cuopt/error.hpp>
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/csr_matrix_utils.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem.hpp>
 #include <cuopt/linear_programming/solve_remote.hpp>
 
 #include <mip_heuristics/mip_constants.hpp>
@@ -27,8 +28,7 @@ namespace cuopt::linear_programming {
 // ==============================================================================
 
 template <typename i_t, typename f_t>
-cpu_optimization_problem_t<i_t, f_t>::cpu_optimization_problem_t(raft::handle_t const* handle_ptr)
-  : handle_ptr_(handle_ptr)
+cpu_optimization_problem_t<i_t, f_t>::cpu_optimization_problem_t()
 {
   CUOPT_LOG_DEBUG("cpu_optimization_problem_t constructor: Using CPU backend");
 }
@@ -568,84 +568,87 @@ std::vector<var_t> cpu_optimization_problem_t<i_t, f_t>::get_variable_types_host
 // ==============================================================================
 
 template <typename i_t, typename f_t>
-optimization_problem_t<i_t, f_t> cpu_optimization_problem_t<i_t, f_t>::to_optimization_problem()
+std::unique_ptr<optimization_problem_t<i_t, f_t>>
+cpu_optimization_problem_t<i_t, f_t>::to_optimization_problem(raft::handle_t const* handle_ptr)
 {
-  if (handle_ptr_ == nullptr) {
+  if (handle_ptr == nullptr) {
     throw std::runtime_error(
       "cpu_optimization_problem_t::to_optimization_problem(): "
       "handle_ptr is null. A RAFT handle with CUDA resources is required to convert "
       "a CPU-backed problem to a GPU-backed optimization_problem_t.");
   }
 
-  optimization_problem_t<i_t, f_t> problem(handle_ptr_);
+  auto gpu_problem = std::make_unique<optimization_problem_t<i_t, f_t>>(handle_ptr);
 
   // Set scalar values
-  problem.set_maximize(maximize_);
-  problem.set_objective_scaling_factor(objective_scaling_factor_);
-  problem.set_objective_offset(objective_offset_);
-  problem.set_problem_category(problem_category_);
+  gpu_problem->set_maximize(maximize_);
+  gpu_problem->set_objective_scaling_factor(objective_scaling_factor_);
+  gpu_problem->set_objective_offset(objective_offset_);
+  gpu_problem->set_problem_category(problem_category_);
 
   // Set string values
-  if (!objective_name_.empty()) problem.set_objective_name(objective_name_);
-  if (!problem_name_.empty()) problem.set_problem_name(problem_name_);
-  if (!var_names_.empty()) problem.set_variable_names(var_names_);
-  if (!row_names_.empty()) problem.set_row_names(row_names_);
+  if (!objective_name_.empty()) gpu_problem->set_objective_name(objective_name_);
+  if (!problem_name_.empty()) gpu_problem->set_problem_name(problem_name_);
+  if (!var_names_.empty()) gpu_problem->set_variable_names(var_names_);
+  if (!row_names_.empty()) gpu_problem->set_row_names(row_names_);
 
   // Set CSR constraint matrix (data will be copied to GPU by optimization_problem_t setters)
   // Use A_offsets_ presence as the guard: a valid CSR can have zero non-zeros but still
   // needs row offsets to define the number of constraints.
   if (!A_offsets_.empty()) {
-    problem.set_csr_constraint_matrix(A_.data(),
-                                      A_.size(),
-                                      A_indices_.data(),
-                                      A_indices_.size(),
-                                      A_offsets_.data(),
-                                      A_offsets_.size());
+    gpu_problem->set_csr_constraint_matrix(A_.data(),
+                                           A_.size(),
+                                           A_indices_.data(),
+                                           A_indices_.size(),
+                                           A_offsets_.data(),
+                                           A_offsets_.size());
   }
 
   // Set constraint bounds
-  if (!b_.empty()) { problem.set_constraint_bounds(b_.data(), b_.size()); }
+  if (!b_.empty()) { gpu_problem->set_constraint_bounds(b_.data(), b_.size()); }
 
   // Set objective coefficients
-  if (!c_.empty()) { problem.set_objective_coefficients(c_.data(), c_.size()); }
+  if (!c_.empty()) { gpu_problem->set_objective_coefficients(c_.data(), c_.size()); }
 
   // Set quadratic objective if present
   if (!Q_values_.empty()) {
-    problem.set_quadratic_objective_matrix(Q_values_.data(),
-                                           Q_values_.size(),
-                                           Q_indices_.data(),
-                                           Q_indices_.size(),
-                                           Q_offsets_.data(),
-                                           Q_offsets_.size());
+    gpu_problem->set_quadratic_objective_matrix(Q_values_.data(),
+                                                Q_values_.size(),
+                                                Q_indices_.data(),
+                                                Q_indices_.size(),
+                                                Q_offsets_.data(),
+                                                Q_offsets_.size());
   }
 
   // Set variable bounds
   if (!variable_lower_bounds_.empty()) {
-    problem.set_variable_lower_bounds(variable_lower_bounds_.data(), variable_lower_bounds_.size());
+    gpu_problem->set_variable_lower_bounds(variable_lower_bounds_.data(),
+                                           variable_lower_bounds_.size());
   }
   if (!variable_upper_bounds_.empty()) {
-    problem.set_variable_upper_bounds(variable_upper_bounds_.data(), variable_upper_bounds_.size());
+    gpu_problem->set_variable_upper_bounds(variable_upper_bounds_.data(),
+                                           variable_upper_bounds_.size());
   }
 
   // Set variable types
   if (!variable_types_.empty()) {
-    problem.set_variable_types(variable_types_.data(), variable_types_.size());
+    gpu_problem->set_variable_types(variable_types_.data(), variable_types_.size());
   }
 
   // Set constraint bounds
   if (!constraint_lower_bounds_.empty()) {
-    problem.set_constraint_lower_bounds(constraint_lower_bounds_.data(),
-                                        constraint_lower_bounds_.size());
+    gpu_problem->set_constraint_lower_bounds(constraint_lower_bounds_.data(),
+                                             constraint_lower_bounds_.size());
   }
   if (!constraint_upper_bounds_.empty()) {
-    problem.set_constraint_upper_bounds(constraint_upper_bounds_.data(),
-                                        constraint_upper_bounds_.size());
+    gpu_problem->set_constraint_upper_bounds(constraint_upper_bounds_.data(),
+                                             constraint_upper_bounds_.size());
   }
 
   // Set row types
-  if (!row_types_.empty()) { problem.set_row_types(row_types_.data(), row_types_.size()); }
+  if (!row_types_.empty()) { gpu_problem->set_row_types(row_types_.data(), row_types_.size()); }
 
-  return problem;
+  return gpu_problem;
 }
 
 // ==============================================================================
@@ -905,29 +908,6 @@ bool cpu_optimization_problem_t<i_t, f_t>::is_equivalent(
   return true;
 }
 
-// ==============================================================================
-// Remote Execution (Polymorphic Dispatch)
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-std::unique_ptr<lp_solution_interface_t<i_t, f_t>>
-cpu_optimization_problem_t<i_t, f_t>::solve_lp_remote(
-  pdlp_solver_settings_t<i_t, f_t> const& settings,
-  bool problem_checking,
-  bool use_pdlp_solver_mode) const
-{
-  return ::cuopt::linear_programming::solve_lp_remote(
-    *this, settings, problem_checking, use_pdlp_solver_mode);
-}
-
-template <typename i_t, typename f_t>
-std::unique_ptr<mip_solution_interface_t<i_t, f_t>>
-cpu_optimization_problem_t<i_t, f_t>::solve_mip_remote(
-  mip_solver_settings_t<i_t, f_t> const& settings) const
-{
-  return ::cuopt::linear_programming::solve_mip_remote(*this, settings);
-}
-
 // ==============================================================================
 // C API Support: Copy to Host (CPU Implementation)
 // ==============================================================================
diff --git a/cpp/src/pdlp/cuopt_c.cpp b/cpp/src/pdlp/cuopt_c.cpp
index d1b6f66272..ed2eab02f2 100644
--- a/cpp/src/pdlp/cuopt_c.cpp
+++ b/cpp/src/pdlp/cuopt_c.cpp
@@ -8,8 +8,8 @@
 #include <cuopt/linear_programming/cuopt_c.h>
 
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
-#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
-#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem_utils.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
diff --git a/cpp/src/pdlp/cuopt_c_internal.hpp b/cpp/src/pdlp/cuopt_c_internal.hpp
index f5e4c1bd75..f10b1d023e 100644
--- a/cpp/src/pdlp/cuopt_c_internal.hpp
+++ b/cpp/src/pdlp/cuopt_c_internal.hpp
@@ -9,9 +9,9 @@
 
 #include <cuopt/linear_programming/cuopt_c.h>
 #include <cuopt/linear_programming/backend_selection.hpp>
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 
@@ -32,14 +32,14 @@ struct problem_and_stream_view_t {
       std::unique_ptr<rmm::cuda_stream_view> sv(
         new rmm::cuda_stream_view(rmm::cuda_stream_per_thread));
       std::unique_ptr<raft::handle_t> h(new raft::handle_t(*sv));
-      std::unique_ptr<gpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>> gp(
-        new gpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>(h.get()));
+      std::unique_ptr<optimization_problem_t<cuopt_int_t, cuopt_float_t>> gp(
+        new optimization_problem_t<cuopt_int_t, cuopt_float_t>(h.get()));
       stream_view_ptr = sv.release();
       handle_ptr      = h.release();
       gpu_problem     = gp.release();
       cpu_problem     = nullptr;
     } else {
-      cpu_problem = new cpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>(nullptr);
+      cpu_problem = new cpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>();
       gpu_problem = nullptr;
     }
   }
@@ -103,17 +103,17 @@ struct problem_and_stream_view_t {
                  cpu_problem);
   }
 
-  optimization_problem_t<cuopt_int_t, cuopt_float_t> to_optimization_problem()
+  optimization_problem_t<cuopt_int_t, cuopt_float_t>* get_gpu_problem()
   {
     if (memory_backend == memory_backend_t::GPU) {
-      return gpu_problem->to_optimization_problem();
+      return gpu_problem;
     } else {
-      return cpu_problem->to_optimization_problem();
+      return nullptr;
     }
   }
 
   memory_backend_t memory_backend;
-  gpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>* gpu_problem;
+  optimization_problem_t<cuopt_int_t, cuopt_float_t>* gpu_problem;
   cpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>* cpu_problem;
   rmm::cuda_stream_view*
     stream_view_ptr;           // nullptr for CPU memory backend to avoid CUDA initialization
diff --git a/cpp/src/pdlp/gpu_optimization_problem.cu b/cpp/src/pdlp/gpu_optimization_problem.cu
deleted file mode 100644
index 4b8cb6c293..0000000000
--- a/cpp/src/pdlp/gpu_optimization_problem.cu
+++ /dev/null
@@ -1,1075 +0,0 @@
-/* clang-format off */
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- */
-/* clang-format on */
-
-#include <cuopt/error.hpp>
-#include <cuopt/linear_programming/csr_matrix_utils.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
-#include <cuopt/linear_programming/solve_remote.hpp>
-
-#include <mip_heuristics/mip_constants.hpp>
-#include <mps_parser/writer.hpp>
-#include <utilities/logger.hpp>
-
-#include <raft/core/copy.hpp>
-#include <raft/core/cuda_support.hpp>
-#include <raft/core/device_mdspan.hpp>
-#include <raft/core/operators.hpp>
-#include <raft/util/cudart_utils.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/copy.h>
-#include <thrust/count.h>
-#include <thrust/execution_policy.h>
-
-#include <cmath>
-#include <stdexcept>
-#include <unordered_map>
-
-namespace cuopt::linear_programming {
-
-template <typename i_t, typename f_t>
-gpu_optimization_problem_t<i_t, f_t>::gpu_optimization_problem_t(raft::handle_t const* handle_ptr)
-  : handle_ptr_(handle_ptr),
-    stream_view_(handle_ptr->get_stream()),
-    A_(0, stream_view_),
-    A_indices_(0, stream_view_),
-    A_offsets_(0, stream_view_),
-    b_(0, stream_view_),
-    c_(0, stream_view_),
-    variable_lower_bounds_(0, stream_view_),
-    variable_upper_bounds_(0, stream_view_),
-    constraint_lower_bounds_(0, stream_view_),
-    constraint_upper_bounds_(0, stream_view_),
-    row_types_(0, stream_view_),
-    variable_types_(0, stream_view_)
-{
-  CUOPT_LOG_DEBUG("gpu_optimization_problem_t constructor: Using GPU backend");
-}
-
-// ==============================================================================
-// Setters
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_maximize(bool maximize)
-{
-  maximize_ = maximize;
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_csr_constraint_matrix(const f_t* A_values,
-                                                                     i_t size_values,
-                                                                     const i_t* A_indices,
-                                                                     i_t size_indices,
-                                                                     const i_t* A_offsets,
-                                                                     i_t size_offsets)
-{
-  cuopt_expects(size_offsets > 0,
-                error_type_t::ValidationError,
-                "CSR offsets array must have at least one element");
-  n_constraints_ = size_offsets - 1;
-
-  A_.resize(size_values, stream_view_);
-  A_indices_.resize(size_indices, stream_view_);
-  A_offsets_.resize(size_offsets, stream_view_);
-
-  raft::copy(A_.data(), A_values, size_values, stream_view_);
-  raft::copy(A_indices_.data(), A_indices, size_indices, stream_view_);
-  raft::copy(A_offsets_.data(), A_offsets, size_offsets, stream_view_);
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_constraint_bounds(const f_t* b, i_t size)
-{
-  b_.resize(size, stream_view_);
-  raft::copy(b_.data(), b, size, stream_view_);
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_objective_coefficients(const f_t* c, i_t size)
-{
-  n_vars_ = size;
-  c_.resize(size, stream_view_);
-  raft::copy(c_.data(), c, size, stream_view_);
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_objective_scaling_factor(
-  f_t objective_scaling_factor)
-{
-  objective_scaling_factor_ = objective_scaling_factor;
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_objective_offset(f_t objective_offset)
-{
-  objective_offset_ = objective_offset;
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_quadratic_objective_matrix(
-  const f_t* Q_values,
-  i_t size_values,
-  const i_t* Q_indices,
-  i_t size_indices,
-  const i_t* Q_offsets,
-  i_t size_offsets,
-  bool validate_positive_semi_definite)
-{
-  // TODO: If validate_positive_semi_definite is true, verify the matrix is PSD
-  // (e.g. attempt a Cholesky factorization and reject if it fails).
-
-  Q_values_.resize(size_values);
-  Q_indices_.resize(size_indices);
-  Q_offsets_.resize(size_offsets);
-
-  std::copy(Q_values, Q_values + size_values, Q_values_.begin());
-  std::copy(Q_indices, Q_indices + size_indices, Q_indices_.begin());
-  std::copy(Q_offsets, Q_offsets + size_offsets, Q_offsets_.begin());
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_variable_lower_bounds(
-  const f_t* variable_lower_bounds, i_t size)
-{
-  variable_lower_bounds_.resize(size, stream_view_);
-  raft::copy(variable_lower_bounds_.data(), variable_lower_bounds, size, stream_view_);
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_variable_upper_bounds(
-  const f_t* variable_upper_bounds, i_t size)
-{
-  variable_upper_bounds_.resize(size, stream_view_);
-  raft::copy(variable_upper_bounds_.data(), variable_upper_bounds, size, stream_view_);
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_variable_types(const var_t* variable_types, i_t size)
-{
-  variable_types_.resize(size, stream_view_);
-  raft::copy(variable_types_.data(), variable_types, size, stream_view_);
-
-  // Auto-detect problem category based on variable types (matching original optimization_problem_t)
-  i_t n_integer = thrust::count_if(handle_ptr_->get_thrust_policy(),
-                                   variable_types_.begin(),
-                                   variable_types_.end(),
-                                   [] __device__(auto val) { return val == var_t::INTEGER; });
-  // By default it is LP
-  if (n_integer == size) {
-    problem_category_ = problem_category_t::IP;
-  } else if (n_integer > 0) {
-    problem_category_ = problem_category_t::MIP;
-  }
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_problem_category(const problem_category_t& category)
-{
-  problem_category_ = category;
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_constraint_lower_bounds(
-  const f_t* constraint_lower_bounds, i_t size)
-{
-  constraint_lower_bounds_.resize(size, stream_view_);
-  raft::copy(constraint_lower_bounds_.data(), constraint_lower_bounds, size, stream_view_);
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_constraint_upper_bounds(
-  const f_t* constraint_upper_bounds, i_t size)
-{
-  constraint_upper_bounds_.resize(size, stream_view_);
-  raft::copy(constraint_upper_bounds_.data(), constraint_upper_bounds, size, stream_view_);
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_row_types(const char* row_types, i_t size)
-{
-  row_types_.resize(size, stream_view_);
-  raft::copy(row_types_.data(), row_types, size, stream_view_);
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_objective_name(const std::string& objective_name)
-{
-  objective_name_ = objective_name;
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_problem_name(const std::string& problem_name)
-{
-  problem_name_ = problem_name;
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_variable_names(
-  const std::vector<std::string>& variable_names)
-{
-  var_names_ = variable_names;
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::set_row_names(const std::vector<std::string>& row_names)
-{
-  row_names_ = row_names;
-}
-
-// ==============================================================================
-// Device Getters
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-i_t gpu_optimization_problem_t<i_t, f_t>::get_n_variables() const
-{
-  return n_vars_;
-}
-
-template <typename i_t, typename f_t>
-i_t gpu_optimization_problem_t<i_t, f_t>::get_n_constraints() const
-{
-  return n_constraints_;
-}
-
-template <typename i_t, typename f_t>
-i_t gpu_optimization_problem_t<i_t, f_t>::get_nnz() const
-{
-  return A_.size();
-}
-
-template <typename i_t, typename f_t>
-i_t gpu_optimization_problem_t<i_t, f_t>::get_n_integers() const
-{
-  if (variable_types_.size() == 0) return 0;
-
-  return thrust::count(
-    rmm::exec_policy(stream_view_), variable_types_.begin(), variable_types_.end(), var_t::INTEGER);
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
-  const
-{
-  return A_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
-{
-  return A_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<i_t>&
-gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices() const
-{
-  return A_indices_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
-{
-  return A_indices_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<i_t>&
-gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets() const
-{
-  return A_offsets_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
-{
-  return A_offsets_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds() const
-{
-  return b_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds()
-{
-  return b_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients()
-  const
-{
-  return c_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients()
-{
-  return c_;
-}
-
-template <typename i_t, typename f_t>
-f_t gpu_optimization_problem_t<i_t, f_t>::get_objective_scaling_factor() const
-{
-  return objective_scaling_factor_;
-}
-
-template <typename i_t, typename f_t>
-f_t gpu_optimization_problem_t<i_t, f_t>::get_objective_offset() const
-{
-  return objective_offset_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
-  const
-{
-  return variable_lower_bounds_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
-{
-  return variable_lower_bounds_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
-  const
-{
-  return variable_upper_bounds_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
-{
-  return variable_upper_bounds_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
-  const
-{
-  return constraint_lower_bounds_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
-{
-  return constraint_lower_bounds_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
-  const
-{
-  return constraint_upper_bounds_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
-{
-  return constraint_upper_bounds_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<char>& gpu_optimization_problem_t<i_t, f_t>::get_row_types() const
-{
-  return row_types_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<var_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_types() const
-{
-  return variable_types_;
-}
-
-template <typename i_t, typename f_t>
-bool gpu_optimization_problem_t<i_t, f_t>::get_sense() const
-{
-  return maximize_;
-}
-
-template <typename i_t, typename f_t>
-bool gpu_optimization_problem_t<i_t, f_t>::empty() const
-{
-  return n_vars_ == 0 || n_constraints_ == 0;
-}
-
-template <typename i_t, typename f_t>
-std::string gpu_optimization_problem_t<i_t, f_t>::get_objective_name() const
-{
-  return objective_name_;
-}
-
-template <typename i_t, typename f_t>
-std::string gpu_optimization_problem_t<i_t, f_t>::get_problem_name() const
-{
-  return problem_name_;
-}
-
-template <typename i_t, typename f_t>
-problem_category_t gpu_optimization_problem_t<i_t, f_t>::get_problem_category() const
-{
-  return problem_category_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<std::string>& gpu_optimization_problem_t<i_t, f_t>::get_variable_names() const
-{
-  return var_names_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<std::string>& gpu_optimization_problem_t<i_t, f_t>::get_row_names() const
-{
-  return row_names_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_offsets()
-  const
-{
-  return Q_offsets_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_indices()
-  const
-{
-  return Q_indices_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_values() const
-{
-  return Q_values_;
-}
-
-template <typename i_t, typename f_t>
-bool gpu_optimization_problem_t<i_t, f_t>::has_quadratic_objective() const
-{
-  return !Q_values_.empty();
-}
-
-template <typename i_t, typename f_t>
-raft::handle_t const* gpu_optimization_problem_t<i_t, f_t>::get_handle_ptr() const noexcept
-{
-  return handle_ptr_;
-}
-
-// ==============================================================================
-// Host Getters (copy from GPU to CPU)
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values_host() const
-{
-  std::vector<f_t> host_data(A_.size());
-  raft::copy(host_data.data(), A_.data(), A_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<i_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices_host() const
-{
-  std::vector<i_t> host_data(A_indices_.size());
-  raft::copy(host_data.data(), A_indices_.data(), A_indices_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<i_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets_host() const
-{
-  std::vector<i_t> host_data(A_offsets_.size());
-  raft::copy(host_data.data(), A_offsets_.data(), A_offsets_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds_host() const
-{
-  std::vector<f_t> host_data(b_.size());
-  raft::copy(host_data.data(), b_.data(), b_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients_host() const
-{
-  std::vector<f_t> host_data(c_.size());
-  raft::copy(host_data.data(), c_.data(), c_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds_host() const
-{
-  std::vector<f_t> host_data(variable_lower_bounds_.size());
-  raft::copy(
-    host_data.data(), variable_lower_bounds_.data(), variable_lower_bounds_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds_host() const
-{
-  std::vector<f_t> host_data(variable_upper_bounds_.size());
-  raft::copy(
-    host_data.data(), variable_upper_bounds_.data(), variable_upper_bounds_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds_host() const
-{
-  std::vector<f_t> host_data(constraint_lower_bounds_.size());
-  raft::copy(host_data.data(),
-             constraint_lower_bounds_.data(),
-             constraint_lower_bounds_.size(),
-             stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds_host() const
-{
-  std::vector<f_t> host_data(constraint_upper_bounds_.size());
-  raft::copy(host_data.data(),
-             constraint_upper_bounds_.data(),
-             constraint_upper_bounds_.size(),
-             stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<char> gpu_optimization_problem_t<i_t, f_t>::get_row_types_host() const
-{
-  std::vector<char> host_data(row_types_.size());
-  raft::copy(host_data.data(), row_types_.data(), row_types_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-template <typename i_t, typename f_t>
-std::vector<var_t> gpu_optimization_problem_t<i_t, f_t>::get_variable_types_host() const
-{
-  std::vector<var_t> host_data(variable_types_.size());
-  raft::copy(host_data.data(), variable_types_.data(), variable_types_.size(), stream_view_);
-  handle_ptr_->sync_stream();
-  return host_data;
-}
-
-// ==============================================================================
-// Conversion to optimization_problem_t
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-optimization_problem_t<i_t, f_t> gpu_optimization_problem_t<i_t, f_t>::to_optimization_problem()
-{
-  // NOTE: This method copies device vectors rather than moving them so that the
-  // gpu_optimization_problem_t remains valid and the problem can be solved multiple
-  // times. The copies are device-to-device at full GPU memory bandwidth and are
-  // negligible compared to solve time. A subsequent refactor will make
-  // optimization_problem_t inherit from the interface directly, eliminating the
-  // need for this conversion entirely.
-
-  optimization_problem_t<i_t, f_t> problem(handle_ptr_);
-
-  // Set scalar values
-  problem.set_maximize(maximize_);
-  problem.set_objective_scaling_factor(objective_scaling_factor_);
-  problem.set_objective_offset(objective_offset_);
-  problem.set_problem_category(problem_category_);
-
-  // Set string values
-  if (!objective_name_.empty()) problem.set_objective_name(objective_name_);
-  if (!problem_name_.empty()) problem.set_problem_name(problem_name_);
-  if (!var_names_.empty()) problem.set_variable_names(var_names_);
-  if (!row_names_.empty()) problem.set_row_names(row_names_);
-
-  // Copy all device vectors (raft::copy handles device-to-device automatically)
-
-  // Copy CSR constraint matrix
-  if (A_.size() > 0) {
-    problem.set_csr_constraint_matrix(A_.data(),
-                                      A_.size(),
-                                      A_indices_.data(),
-                                      A_indices_.size(),
-                                      A_offsets_.data(),
-                                      A_offsets_.size());
-  }
-
-  // Copy constraint bounds
-  if (b_.size() > 0) { problem.set_constraint_bounds(b_.data(), b_.size()); }
-
-  // Copy objective coefficients
-  if (c_.size() > 0) { problem.set_objective_coefficients(c_.data(), c_.size()); }
-
-  // Set quadratic objective if present (stored in std::vector, not device_uvector)
-  if (!Q_values_.empty()) {
-    problem.set_quadratic_objective_matrix(Q_values_.data(),
-                                           Q_values_.size(),
-                                           Q_indices_.data(),
-                                           Q_indices_.size(),
-                                           Q_offsets_.data(),
-                                           Q_offsets_.size());
-  }
-
-  // Copy variable bounds
-  if (variable_lower_bounds_.size() > 0) {
-    problem.set_variable_lower_bounds(variable_lower_bounds_.data(), variable_lower_bounds_.size());
-  }
-  if (variable_upper_bounds_.size() > 0) {
-    problem.set_variable_upper_bounds(variable_upper_bounds_.data(), variable_upper_bounds_.size());
-  }
-
-  // Copy variable types
-  if (variable_types_.size() > 0) {
-    problem.set_variable_types(variable_types_.data(), variable_types_.size());
-  }
-
-  // Copy constraint lower/upper bounds
-  if (constraint_lower_bounds_.size() > 0) {
-    problem.set_constraint_lower_bounds(constraint_lower_bounds_.data(),
-                                        constraint_lower_bounds_.size());
-  }
-  if (constraint_upper_bounds_.size() > 0) {
-    problem.set_constraint_upper_bounds(constraint_upper_bounds_.data(),
-                                        constraint_upper_bounds_.size());
-  }
-
-  // Copy row types
-  if (row_types_.size() > 0) { problem.set_row_types(row_types_.data(), row_types_.size()); }
-
-  return problem;
-}
-
-// ==============================================================================
-// File I/O
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_path)
-{
-  // Use the existing host getters to get data, then write to MPS
-  cuopt::mps_parser::data_model_view_t<i_t, f_t> data_model_view;
-
-  // Set optimization sense
-  data_model_view.set_maximize(get_sense());
-
-  // Copy to host using host getters
-  auto constraint_matrix_values  = get_constraint_matrix_values_host();
-  auto constraint_matrix_indices = get_constraint_matrix_indices_host();
-  auto constraint_matrix_offsets = get_constraint_matrix_offsets_host();
-  auto constraint_bounds         = get_constraint_bounds_host();
-  auto objective_coefficients    = get_objective_coefficients_host();
-  auto variable_lower_bounds     = get_variable_lower_bounds_host();
-  auto variable_upper_bounds     = get_variable_upper_bounds_host();
-  auto constraint_lower_bounds   = get_constraint_lower_bounds_host();
-  auto constraint_upper_bounds   = get_constraint_upper_bounds_host();
-  auto row_types                 = get_row_types_host();
-
-  // Set constraint matrix in CSR format
-  if (!constraint_matrix_values.empty()) {
-    data_model_view.set_csr_constraint_matrix(constraint_matrix_values.data(),
-                                              constraint_matrix_values.size(),
-                                              constraint_matrix_indices.data(),
-                                              constraint_matrix_indices.size(),
-                                              constraint_matrix_offsets.data(),
-                                              constraint_matrix_offsets.size());
-  }
-
-  // Set constraint bounds (RHS)
-  if (!constraint_bounds.empty()) {
-    data_model_view.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
-  }
-
-  // Set objective coefficients
-  if (!objective_coefficients.empty()) {
-    data_model_view.set_objective_coefficients(objective_coefficients.data(),
-                                               objective_coefficients.size());
-  }
-
-  // Set objective scaling and offset
-  data_model_view.set_objective_scaling_factor(objective_scaling_factor_);
-  data_model_view.set_objective_offset(objective_offset_);
-
-  // Set variable bounds
-  if (!variable_lower_bounds.empty()) {
-    data_model_view.set_variable_lower_bounds(variable_lower_bounds.data(),
-                                              variable_lower_bounds.size());
-  }
-  if (!variable_upper_bounds.empty()) {
-    data_model_view.set_variable_upper_bounds(variable_upper_bounds.data(),
-                                              variable_upper_bounds.size());
-  }
-
-  // Set row types (constraint types)
-  if (!row_types.empty()) { data_model_view.set_row_types(row_types.data(), row_types.size()); }
-
-  // Set constraint bounds (independently, a problem may have only one side)
-  if (!constraint_lower_bounds.empty()) {
-    data_model_view.set_constraint_lower_bounds(constraint_lower_bounds.data(),
-                                                constraint_lower_bounds.size());
-  }
-  if (!constraint_upper_bounds.empty()) {
-    data_model_view.set_constraint_upper_bounds(constraint_upper_bounds.data(),
-                                                constraint_upper_bounds.size());
-  }
-
-  // Set problem and variable names FIRST (before variable types)
-  if (!problem_name_.empty()) { data_model_view.set_problem_name(problem_name_); }
-  if (!objective_name_.empty()) { data_model_view.set_objective_name(objective_name_); }
-  if (!var_names_.empty()) { data_model_view.set_variable_names(var_names_); }
-  if (!row_names_.empty()) { data_model_view.set_row_names(row_names_); }
-
-  // Set variable types AFTER names (convert from enum to char)
-  // CRITICAL: Declare variable_types OUTSIDE the if block so it stays alive
-  // until after write_mps() is called, since data_model_view stores a span (pointer) to it
-  std::vector<char> variable_types;
-  if (n_vars_ > 0) {
-    auto enum_variable_types = get_variable_types_host();
-    variable_types.resize(enum_variable_types.size());
-
-    for (size_t i = 0; i < variable_types.size(); ++i) {
-      variable_types[i] = (enum_variable_types[i] == var_t::INTEGER) ? 'I' : 'C';
-    }
-
-    data_model_view.set_variable_types(variable_types.data(), variable_types.size());
-  }
-
-  cuopt::mps_parser::write_mps(data_model_view, mps_file_path);
-}
-
-// ==============================================================================
-// Comparison
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-bool gpu_optimization_problem_t<i_t, f_t>::is_equivalent(
-  const optimization_problem_interface_t<i_t, f_t>& other) const
-{
-  // Compare scalar properties
-  if (maximize_ != other.get_sense()) return false;
-  if (n_vars_ != other.get_n_variables()) return false;
-  if (n_constraints_ != other.get_n_constraints()) return false;
-  if (std::abs(objective_scaling_factor_ - other.get_objective_scaling_factor()) > 1e-9)
-    return false;
-  if (std::abs(objective_offset_ - other.get_objective_offset()) > 1e-9) return false;
-  if (problem_category_ != other.get_problem_category()) return false;
-
-  // Get host data from both problems
-  auto this_c  = get_objective_coefficients_host();
-  auto other_c = other.get_objective_coefficients_host();
-  if (this_c.size() != other_c.size()) return false;
-
-  auto this_var_lb  = get_variable_lower_bounds_host();
-  auto other_var_lb = other.get_variable_lower_bounds_host();
-  if (this_var_lb.size() != other_var_lb.size()) return false;
-
-  auto this_var_ub  = get_variable_upper_bounds_host();
-  auto other_var_ub = other.get_variable_upper_bounds_host();
-  if (this_var_ub.size() != other_var_ub.size()) return false;
-
-  auto this_var_types  = get_variable_types_host();
-  auto other_var_types = other.get_variable_types_host();
-  if (this_var_types.size() != other_var_types.size()) return false;
-
-  auto this_b  = get_constraint_bounds_host();
-  auto other_b = other.get_constraint_bounds_host();
-  if (this_b.size() != other_b.size()) return false;
-
-  auto this_A_values  = get_constraint_matrix_values_host();
-  auto other_A_values = other.get_constraint_matrix_values_host();
-  if (this_A_values.size() != other_A_values.size()) return false;
-
-  // Check if we have variable and row names for permutation matching
-  const auto& this_var_names  = get_variable_names();
-  const auto& other_var_names = other.get_variable_names();
-  const auto& this_row_names  = get_row_names();
-  const auto& other_row_names = other.get_row_names();
-
-  bool has_names = !this_var_names.empty() && !other_var_names.empty() && !this_row_names.empty() &&
-                   !other_row_names.empty();
-
-  // If no names, fall back to direct-order comparison
-  if (!has_names) {
-    for (i_t i = 0; i < n_vars_; ++i) {
-      if (std::abs(this_c[i] - other_c[i]) > 1e-9) return false;
-      if (std::abs(this_var_lb[i] - other_var_lb[i]) > 1e-9) return false;
-      if (std::abs(this_var_ub[i] - other_var_ub[i]) > 1e-9) return false;
-      if (this_var_types[i] != other_var_types[i]) return false;
-    }
-    for (i_t i = 0; i < n_constraints_; ++i) {
-      if (std::abs(this_b[i] - other_b[i]) > 1e-9) return false;
-    }
-    // Direct CSR comparison without permutation
-    auto this_A_indices  = get_constraint_matrix_indices_host();
-    auto other_A_indices = other.get_constraint_matrix_indices_host();
-    auto this_A_offsets  = get_constraint_matrix_offsets_host();
-    auto other_A_offsets = other.get_constraint_matrix_offsets_host();
-    if (this_A_indices.size() != other_A_indices.size()) return false;
-    if (this_A_offsets.size() != other_A_offsets.size()) return false;
-    for (size_t i = 0; i < this_A_values.size(); ++i) {
-      if (std::abs(this_A_values[i] - other_A_values[i]) > 1e-9) return false;
-    }
-    for (size_t i = 0; i < this_A_indices.size(); ++i) {
-      if (this_A_indices[i] != other_A_indices[i]) return false;
-    }
-    for (size_t i = 0; i < this_A_offsets.size(); ++i) {
-      if (this_A_offsets[i] != other_A_offsets[i]) return false;
-    }
-    return true;
-  }
-
-  // Build variable permutation map
-  std::unordered_map<std::string, i_t> other_var_idx;
-  for (size_t j = 0; j < other_var_names.size(); ++j) {
-    other_var_idx[other_var_names[j]] = static_cast<i_t>(j);
-  }
-
-  std::vector<i_t> var_perm(n_vars_);
-  for (i_t i = 0; i < n_vars_; ++i) {
-    auto it = other_var_idx.find(this_var_names[i]);
-    if (it == other_var_idx.end()) return false;
-    var_perm[i] = it->second;
-  }
-
-  // Build row permutation map
-  std::unordered_map<std::string, i_t> other_row_idx;
-  for (size_t j = 0; j < other_row_names.size(); ++j) {
-    other_row_idx[other_row_names[j]] = static_cast<i_t>(j);
-  }
-
-  std::vector<i_t> row_perm(n_constraints_);
-  for (i_t i = 0; i < n_constraints_; ++i) {
-    auto it = other_row_idx.find(this_row_names[i]);
-    if (it == other_row_idx.end()) return false;
-    row_perm[i] = it->second;
-  }
-
-  // Compare variable-indexed arrays with permutation
-  for (i_t i = 0; i < n_vars_; ++i) {
-    i_t j = var_perm[i];
-    if (std::abs(this_c[i] - other_c[j]) > 1e-9) return false;
-    if (std::abs(this_var_lb[i] - other_var_lb[j]) > 1e-9) return false;
-    if (std::abs(this_var_ub[i] - other_var_ub[j]) > 1e-9) return false;
-    if (this_var_types[i] != other_var_types[j]) return false;
-  }
-
-  // Compare constraint-indexed arrays with permutation
-  for (i_t i = 0; i < n_constraints_; ++i) {
-    i_t j = row_perm[i];
-    if (std::abs(this_b[i] - other_b[j]) > 1e-9) return false;
-  }
-
-  // Compare constraint lower/upper bounds with permutation
-  auto this_clb  = get_constraint_lower_bounds_host();
-  auto other_clb = other.get_constraint_lower_bounds_host();
-  if (this_clb.size() != other_clb.size()) return false;
-  for (i_t i = 0; i < n_constraints_ && i < static_cast<i_t>(this_clb.size()); ++i) {
-    i_t j = row_perm[i];
-    if (std::abs(this_clb[i] - other_clb[j]) > 1e-9) return false;
-  }
-
-  auto this_cub  = get_constraint_upper_bounds_host();
-  auto other_cub = other.get_constraint_upper_bounds_host();
-  if (this_cub.size() != other_cub.size()) return false;
-  for (i_t i = 0; i < n_constraints_ && i < static_cast<i_t>(this_cub.size()); ++i) {
-    i_t j = row_perm[i];
-    if (std::abs(this_cub[i] - other_cub[j]) > 1e-9) return false;
-  }
-
-  // Compare row types with permutation
-  auto this_rt  = get_row_types_host();
-  auto other_rt = other.get_row_types_host();
-  if (this_rt.size() != other_rt.size()) return false;
-  for (i_t i = 0; i < n_constraints_ && i < static_cast<i_t>(this_rt.size()); ++i) {
-    i_t j = row_perm[i];
-    if (this_rt[i] != other_rt[j]) return false;
-  }
-
-  // Compare CSR constraint matrix with row/column permutations
-  auto this_A_indices  = get_constraint_matrix_indices_host();
-  auto other_A_indices = other.get_constraint_matrix_indices_host();
-  auto this_A_offsets  = get_constraint_matrix_offsets_host();
-  auto other_A_offsets = other.get_constraint_matrix_offsets_host();
-
-  if (!csr_matrices_equivalent_with_permutation_host(this_A_offsets,
-                                                     this_A_indices,
-                                                     this_A_values,
-                                                     other_A_offsets,
-                                                     other_A_indices,
-                                                     other_A_values,
-                                                     row_perm,
-                                                     var_perm)) {
-    return false;
-  }
-
-  return true;
-}
-
-// ==============================================================================
-// Remote Execution (Polymorphic Dispatch)
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-std::unique_ptr<lp_solution_interface_t<i_t, f_t>>
-gpu_optimization_problem_t<i_t, f_t>::solve_lp_remote(
-  pdlp_solver_settings_t<i_t, f_t> const& settings,
-  bool problem_checking,
-  bool use_pdlp_solver_mode) const
-{
-  return ::cuopt::linear_programming::solve_lp_remote(
-    *this, settings, problem_checking, use_pdlp_solver_mode);
-}
-
-template <typename i_t, typename f_t>
-std::unique_ptr<mip_solution_interface_t<i_t, f_t>>
-gpu_optimization_problem_t<i_t, f_t>::solve_mip_remote(
-  mip_solver_settings_t<i_t, f_t> const& settings) const
-{
-  return ::cuopt::linear_programming::solve_mip_remote(*this, settings);
-}
-
-// ==============================================================================
-// C API Support: Copy to Host (GPU Implementation)
-// ==============================================================================
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_objective_coefficients_to_host(f_t* output,
-                                                                               i_t size) const
-{
-  cuopt_expects(output != nullptr && size <= static_cast<i_t>(c_.size()),
-                error_type_t::ValidationError,
-                "copy_objective_coefficients_to_host: null output or size exceeds buffer");
-  RAFT_CUDA_TRY(cudaMemcpy(output, c_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_constraint_matrix_to_host(
-  f_t* values, i_t* indices, i_t* offsets, i_t num_values, i_t num_indices, i_t num_offsets) const
-{
-  cuopt_expects(values != nullptr && num_values <= static_cast<i_t>(A_.size()),
-                error_type_t::ValidationError,
-                "copy_constraint_matrix_to_host: null values or size exceeds buffer");
-  cuopt_expects(indices != nullptr && num_indices <= static_cast<i_t>(A_indices_.size()),
-                error_type_t::ValidationError,
-                "copy_constraint_matrix_to_host: null indices or size exceeds buffer");
-  cuopt_expects(offsets != nullptr && num_offsets <= static_cast<i_t>(A_offsets_.size()),
-                error_type_t::ValidationError,
-                "copy_constraint_matrix_to_host: null offsets or size exceeds buffer");
-  RAFT_CUDA_TRY(cudaMemcpy(values, A_.data(), num_values * sizeof(f_t), cudaMemcpyDeviceToHost));
-  RAFT_CUDA_TRY(
-    cudaMemcpy(indices, A_indices_.data(), num_indices * sizeof(i_t), cudaMemcpyDeviceToHost));
-  RAFT_CUDA_TRY(
-    cudaMemcpy(offsets, A_offsets_.data(), num_offsets * sizeof(i_t), cudaMemcpyDeviceToHost));
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_row_types_to_host(char* output, i_t size) const
-{
-  cuopt_expects(output != nullptr && size <= static_cast<i_t>(row_types_.size()),
-                error_type_t::ValidationError,
-                "copy_row_types_to_host: null output or size exceeds buffer");
-  RAFT_CUDA_TRY(cudaMemcpy(output, row_types_.data(), size * sizeof(char), cudaMemcpyDeviceToHost));
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_constraint_bounds_to_host(f_t* output,
-                                                                          i_t size) const
-{
-  cuopt_expects(output != nullptr && size <= static_cast<i_t>(b_.size()),
-                error_type_t::ValidationError,
-                "copy_constraint_bounds_to_host: null output or size exceeds buffer");
-  RAFT_CUDA_TRY(cudaMemcpy(output, b_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_constraint_lower_bounds_to_host(f_t* output,
-                                                                                i_t size) const
-{
-  cuopt_expects(output != nullptr && size <= static_cast<i_t>(constraint_lower_bounds_.size()),
-                error_type_t::ValidationError,
-                "copy_constraint_lower_bounds_to_host: null output or size exceeds buffer");
-  RAFT_CUDA_TRY(cudaMemcpy(
-    output, constraint_lower_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_constraint_upper_bounds_to_host(f_t* output,
-                                                                                i_t size) const
-{
-  cuopt_expects(output != nullptr && size <= static_cast<i_t>(constraint_upper_bounds_.size()),
-                error_type_t::ValidationError,
-                "copy_constraint_upper_bounds_to_host: null output or size exceeds buffer");
-  RAFT_CUDA_TRY(cudaMemcpy(
-    output, constraint_upper_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_variable_lower_bounds_to_host(f_t* output,
-                                                                              i_t size) const
-{
-  cuopt_expects(output != nullptr && size <= static_cast<i_t>(variable_lower_bounds_.size()),
-                error_type_t::ValidationError,
-                "copy_variable_lower_bounds_to_host: null output or size exceeds buffer");
-  RAFT_CUDA_TRY(
-    cudaMemcpy(output, variable_lower_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_variable_upper_bounds_to_host(f_t* output,
-                                                                              i_t size) const
-{
-  cuopt_expects(output != nullptr && size <= static_cast<i_t>(variable_upper_bounds_.size()),
-                error_type_t::ValidationError,
-                "copy_variable_upper_bounds_to_host: null output or size exceeds buffer");
-  RAFT_CUDA_TRY(
-    cudaMemcpy(output, variable_upper_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
-}
-
-template <typename i_t, typename f_t>
-void gpu_optimization_problem_t<i_t, f_t>::copy_variable_types_to_host(var_t* output,
-                                                                       i_t size) const
-{
-  cuopt_expects(output != nullptr && size <= static_cast<i_t>(variable_types_.size()),
-                error_type_t::ValidationError,
-                "copy_variable_types_to_host: null output or size exceeds buffer");
-  RAFT_CUDA_TRY(
-    cudaMemcpy(output, variable_types_.data(), size * sizeof(var_t), cudaMemcpyDeviceToHost));
-}
-
-// ==============================================================================
-// Template instantiations
-// ==============================================================================
-// Explicit template instantiations matching optimization_problem_t
-#if MIP_INSTANTIATE_FLOAT
-template class gpu_optimization_problem_t<int32_t, float>;
-#endif
-#if MIP_INSTANTIATE_DOUBLE
-template class gpu_optimization_problem_t<int32_t, double>;
-#endif
-
-}  // namespace cuopt::linear_programming
diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu
index 65ac8b4ad7..d0888dd3ac 100644
--- a/cpp/src/pdlp/optimization_problem.cu
+++ b/cpp/src/pdlp/optimization_problem.cu
@@ -5,23 +5,34 @@
  */
 /* clang-format on */
 
-#include <cuopt/error.hpp>
-#include <mps_parser/writer.hpp>
-#include <utilities/logger.hpp>
-
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/solve_remote.hpp>
+
+#include <cuopt/error.hpp>
+#include <cuopt/linear_programming/csr_matrix_utils.hpp>
 #include <mip_heuristics/mip_constants.hpp>
+#include <mps_parser/writer.hpp>
 #include <utilities/copy_helpers.hpp>
+#include <utilities/logger.hpp>
 
+#include <raft/core/copy.hpp>
+#include <raft/core/cuda_support.hpp>
+#include <raft/core/device_mdspan.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/binary_search.h>
+#include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/equal.h>
+#include <thrust/execution_policy.h>
 #include <thrust/gather.h>
 #include <thrust/iterator/permutation_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
@@ -31,6 +42,8 @@
 #include <cuda_profiler_api.h>
 
 #include <algorithm>
+#include <cmath>
+#include <stdexcept>
 #include <unordered_map>
 
 namespace cuopt::linear_programming {
@@ -38,25 +51,21 @@ namespace cuopt::linear_programming {
 template <typename i_t, typename f_t>
 optimization_problem_t<i_t, f_t>::optimization_problem_t(raft::handle_t const* handle_ptr)
   : handle_ptr_(handle_ptr),
-    stream_view_(handle_ptr_->get_stream()),
-    maximize_{false},
-    n_vars_{0},
-    n_constraints_{0},
-    A_{0, stream_view_},
-    A_indices_{0, stream_view_},
-    A_offsets_{0, stream_view_},
-    b_{0, stream_view_},
-    c_{0, stream_view_},
-    variable_lower_bounds_{0, stream_view_},
-    variable_upper_bounds_{0, stream_view_},
-    constraint_lower_bounds_{0, stream_view_},
-    constraint_upper_bounds_{0, stream_view_},
-    row_types_{0, stream_view_},
-    variable_types_{0, stream_view_},
-    var_names_{},
-    row_names_{}
+    stream_view_(handle_ptr != nullptr ? handle_ptr->get_stream() : rmm::cuda_stream_view{}),
+    A_(0, stream_view_),
+    A_indices_(0, stream_view_),
+    A_offsets_(0, stream_view_),
+    b_(0, stream_view_),
+    c_(0, stream_view_),
+    variable_lower_bounds_(0, stream_view_),
+    variable_upper_bounds_(0, stream_view_),
+    constraint_lower_bounds_(0, stream_view_),
+    constraint_upper_bounds_(0, stream_view_),
+    row_types_(0, stream_view_),
+    variable_types_(0, stream_view_)
 {
   raft::common::nvtx::range fun_scope("optimization problem construction");
+  CUOPT_LOG_INFO("optimization_problem_t constructor: Using GPU backend");
 }
 
 template <typename i_t, typename f_t>
@@ -74,6 +83,9 @@ optimization_problem_t<i_t, f_t>::optimization_problem_t(
     c_{other.get_objective_coefficients(), stream_view_},
     objective_scaling_factor_{other.get_objective_scaling_factor()},
     objective_offset_{other.get_objective_offset()},
+    Q_offsets_{other.get_quadratic_objective_offsets()},
+    Q_indices_{other.get_quadratic_objective_indices()},
+    Q_values_{other.get_quadratic_objective_values()},
     variable_lower_bounds_{other.get_variable_lower_bounds(), stream_view_},
     variable_upper_bounds_{other.get_variable_upper_bounds(), stream_view_},
     constraint_lower_bounds_{other.get_constraint_lower_bounds(), stream_view_},
@@ -88,255 +100,14 @@ optimization_problem_t<i_t, f_t>::optimization_problem_t(
 {
 }
 
-/**
- * @brief Compare two CSR matrices for equivalence under row and column permutations.
- *
- * @param this_offsets Row offsets of first matrix
- * @param this_indices Column indices of first matrix
- * @param this_values Values of first matrix
- * @param other_offsets Row offsets of second matrix
- * @param other_indices Column indices of second matrix
- * @param other_values Values of second matrix
- * @param d_row_perm_inv Inverse row permutation (maps other's row indices to this's)
- * @param d_col_perm_inv Inverse column permutation (maps other's col indices to this's)
- * @param n_cols Number of columns (used for sort key computation)
- * @param stream CUDA stream
- * @return true if matrices are equivalent under the given permutations
- */
-template <typename i_t, typename f_t>
-static bool csr_matrices_equivalent_with_permutation(const rmm::device_uvector<i_t>& this_offsets,
-                                                     const rmm::device_uvector<i_t>& this_indices,
-                                                     const rmm::device_uvector<f_t>& this_values,
-                                                     const rmm::device_uvector<i_t>& other_offsets,
-                                                     const rmm::device_uvector<i_t>& other_indices,
-                                                     const rmm::device_uvector<f_t>& other_values,
-                                                     const rmm::device_uvector<i_t>& d_row_perm_inv,
-                                                     const rmm::device_uvector<i_t>& d_col_perm_inv,
-                                                     i_t n_cols,
-                                                     rmm::cuda_stream_view stream)
-{
-  const i_t nnz = static_cast<i_t>(this_values.size());
-  if (nnz != static_cast<i_t>(other_values.size())) { return false; }
-  if (nnz == 0) { return true; }
-
-  auto policy = rmm::exec_policy(stream);
-
-  // Expand CSR row offsets to row indices for 'this'
-  rmm::device_uvector<i_t> this_rows(nnz, stream);
-  rmm::device_uvector<i_t> this_cols(nnz, stream);
-  rmm::device_uvector<f_t> this_vals(nnz, stream);
-
-  // upper_bound returns 1-based indices; convert to 0-based
-  thrust::upper_bound(policy,
-                      this_offsets.begin(),
-                      this_offsets.end(),
-                      thrust::make_counting_iterator<i_t>(0),
-                      thrust::make_counting_iterator<i_t>(nnz),
-                      this_rows.begin());
-  thrust::transform(
-    policy, this_rows.begin(), this_rows.end(), this_rows.begin(), [] __device__(i_t r) {
-      return r - 1;
-    });
-
-  thrust::copy(policy, this_indices.begin(), this_indices.end(), this_cols.begin());
-  thrust::copy(policy, this_values.begin(), this_values.end(), this_vals.begin());
-
-  // For 'other': expand and apply inverse permutations to map to 'this' coordinate system
-  rmm::device_uvector<i_t> other_rows(nnz, stream);
-  rmm::device_uvector<i_t> other_cols(nnz, stream);
-  rmm::device_uvector<f_t> other_vals(nnz, stream);
-
-  thrust::upper_bound(policy,
-                      other_offsets.begin(),
-                      other_offsets.end(),
-                      thrust::make_counting_iterator<i_t>(0),
-                      thrust::make_counting_iterator<i_t>(nnz),
-                      other_rows.begin());
-  thrust::transform(
-    policy, other_rows.begin(), other_rows.end(), other_rows.begin(), [] __device__(i_t r) {
-      return r - 1;
-    });
-
-  thrust::gather(
-    policy, other_rows.begin(), other_rows.end(), d_row_perm_inv.begin(), other_rows.begin());
-
-  thrust::gather(
-    policy, other_indices.begin(), other_indices.end(), d_col_perm_inv.begin(), other_cols.begin());
-
-  thrust::copy(policy, other_values.begin(), other_values.end(), other_vals.begin());
-
-  // Create sort keys: row * n_cols + col (to sort by row then column)
-  rmm::device_uvector<int64_t> this_keys(nnz, stream);
-  rmm::device_uvector<int64_t> other_keys(nnz, stream);
-
-  const int64_t n_cols_64 = n_cols;
-  thrust::transform(policy,
-                    thrust::make_zip_iterator(this_rows.begin(), this_cols.begin()),
-                    thrust::make_zip_iterator(this_rows.end(), this_cols.end()),
-                    this_keys.begin(),
-                    [n_cols_64] __device__(thrust::tuple<i_t, i_t> rc) {
-                      return static_cast<int64_t>(thrust::get<0>(rc)) * n_cols_64 +
-                             static_cast<int64_t>(thrust::get<1>(rc));
-                    });
-
-  thrust::transform(policy,
-                    thrust::make_zip_iterator(other_rows.begin(), other_cols.begin()),
-                    thrust::make_zip_iterator(other_rows.end(), other_cols.end()),
-                    other_keys.begin(),
-                    [n_cols_64] __device__(thrust::tuple<i_t, i_t> rc) {
-                      return static_cast<int64_t>(thrust::get<0>(rc)) * n_cols_64 +
-                             static_cast<int64_t>(thrust::get<1>(rc));
-                    });
-
-  thrust::sort_by_key(policy, this_keys.begin(), this_keys.end(), this_vals.begin());
-  thrust::sort_by_key(policy, other_keys.begin(), other_keys.end(), other_vals.begin());
-
-  if (!thrust::equal(policy, this_keys.begin(), this_keys.end(), other_keys.begin())) {
-    return false;
-  }
-
-  if (!thrust::equal(policy, this_vals.begin(), this_vals.end(), other_vals.begin())) {
-    return false;
-  }
-
-  return true;
-}
+// ==============================================================================
+// Setters
+// ==============================================================================
 
 template <typename i_t, typename f_t>
-bool optimization_problem_t<i_t, f_t>::is_equivalent(
-  const optimization_problem_t<i_t, f_t>& other) const
+void optimization_problem_t<i_t, f_t>::set_maximize(bool maximize)
 {
-  if (maximize_ != other.maximize_) { return false; }
-  if (n_vars_ != other.n_vars_) { return false; }
-  if (n_constraints_ != other.n_constraints_) { return false; }
-  if (objective_scaling_factor_ != other.objective_scaling_factor_) { return false; }
-  if (objective_offset_ != other.objective_offset_) { return false; }
-  if (problem_category_ != other.problem_category_) { return false; }
-  if (A_.size() != other.A_.size()) { return false; }
-
-  if (var_names_.empty() || other.var_names_.empty()) { return false; }
-  if (row_names_.empty() || other.row_names_.empty()) { return false; }
-
-  // Build variable permutation: var_perm[i] = index j in other where var_names_[i] ==
-  // other.var_names_[j]
-  std::unordered_map<std::string, i_t> other_var_idx;
-  for (size_t j = 0; j < other.var_names_.size(); ++j) {
-    other_var_idx[other.var_names_[j]] = static_cast<i_t>(j);
-  }
-  std::vector<i_t> var_perm(n_vars_);
-  for (i_t i = 0; i < n_vars_; ++i) {
-    auto it = other_var_idx.find(var_names_[i]);
-    if (it == other_var_idx.end()) { return false; }
-    var_perm[i] = it->second;
-  }
-
-  // Build row permutation: row_perm[i] = index j in other where row_names_[i] ==
-  // other.row_names_[j]
-  std::unordered_map<std::string, i_t> other_row_idx;
-  for (size_t j = 0; j < other.row_names_.size(); ++j) {
-    other_row_idx[other.row_names_[j]] = static_cast<i_t>(j);
-  }
-  std::vector<i_t> row_perm(n_constraints_);
-  for (i_t i = 0; i < n_constraints_; ++i) {
-    auto it = other_row_idx.find(row_names_[i]);
-    if (it == other_row_idx.end()) { return false; }
-    row_perm[i] = it->second;
-  }
-
-  // Upload permutations to GPU
-  rmm::device_uvector<i_t> d_var_perm(n_vars_, stream_view_);
-  rmm::device_uvector<i_t> d_row_perm(n_constraints_, stream_view_);
-  raft::copy(d_var_perm.data(), var_perm.data(), n_vars_, stream_view_);
-  raft::copy(d_row_perm.data(), row_perm.data(), n_constraints_, stream_view_);
-
-  auto policy = rmm::exec_policy(stream_view_);
-
-  auto permuted_eq = [&](auto this_begin, auto this_end, auto other_begin, auto perm_begin) {
-    auto other_perm = thrust::make_permutation_iterator(other_begin, perm_begin);
-    return thrust::equal(policy, this_begin, this_end, other_perm);
-  };
-
-  // Compare variable-indexed arrays
-  if (c_.size() != other.c_.size()) { return false; }
-  if (!permuted_eq(c_.begin(), c_.end(), other.c_.begin(), d_var_perm.begin())) { return false; }
-  if (variable_lower_bounds_.size() != other.variable_lower_bounds_.size()) { return false; }
-  if (!permuted_eq(variable_lower_bounds_.begin(),
-                   variable_lower_bounds_.end(),
-                   other.variable_lower_bounds_.begin(),
-                   d_var_perm.begin())) {
-    return false;
-  }
-  if (variable_upper_bounds_.size() != other.variable_upper_bounds_.size()) { return false; }
-  if (!permuted_eq(variable_upper_bounds_.begin(),
-                   variable_upper_bounds_.end(),
-                   other.variable_upper_bounds_.begin(),
-                   d_var_perm.begin())) {
-    return false;
-  }
-  if (variable_types_.size() != other.variable_types_.size()) { return false; }
-  if (!permuted_eq(variable_types_.begin(),
-                   variable_types_.end(),
-                   other.variable_types_.begin(),
-                   d_var_perm.begin())) {
-    return false;
-  }
-
-  // Compare constraint-indexed arrays
-  if (b_.size() != other.b_.size()) { return false; }
-  if (!permuted_eq(b_.begin(), b_.end(), other.b_.begin(), d_row_perm.begin())) { return false; }
-  if (constraint_lower_bounds_.size() != other.constraint_lower_bounds_.size()) { return false; }
-  if (!permuted_eq(constraint_lower_bounds_.begin(),
-                   constraint_lower_bounds_.end(),
-                   other.constraint_lower_bounds_.begin(),
-                   d_row_perm.begin())) {
-    return false;
-  }
-  if (constraint_upper_bounds_.size() != other.constraint_upper_bounds_.size()) { return false; }
-  if (!permuted_eq(constraint_upper_bounds_.begin(),
-                   constraint_upper_bounds_.end(),
-                   other.constraint_upper_bounds_.begin(),
-                   d_row_perm.begin())) {
-    return false;
-  }
-  if (row_types_.size() != other.row_types_.size()) { return false; }
-  if (!permuted_eq(
-        row_types_.begin(), row_types_.end(), other.row_types_.begin(), d_row_perm.begin())) {
-    return false;
-  }
-
-  // Build inverse permutations on CPU (needed for CSR comparisons)
-  std::vector<i_t> var_perm_inv(n_vars_);
-  for (i_t i = 0; i < n_vars_; ++i) {
-    var_perm_inv[var_perm[i]] = i;
-  }
-  std::vector<i_t> row_perm_inv(n_constraints_);
-  for (i_t i = 0; i < n_constraints_; ++i) {
-    row_perm_inv[row_perm[i]] = i;
-  }
-
-  // Upload inverse permutations to GPU
-  rmm::device_uvector<i_t> d_var_perm_inv(n_vars_, stream_view_);
-  rmm::device_uvector<i_t> d_row_perm_inv(n_constraints_, stream_view_);
-  raft::copy(d_var_perm_inv.data(), var_perm_inv.data(), n_vars_, stream_view_);
-  raft::copy(d_row_perm_inv.data(), row_perm_inv.data(), n_constraints_, stream_view_);
-
-  // Constraint matrix (A) comparison with row and column permutations
-  if (!csr_matrices_equivalent_with_permutation(A_offsets_,
-                                                A_indices_,
-                                                A_,
-                                                other.A_offsets_,
-                                                other.A_indices_,
-                                                other.A_,
-                                                d_row_perm_inv,
-                                                d_var_perm_inv,
-                                                n_vars_,
-                                                stream_view_)) {
-    return false;
-  }
-
-  // Q matrix writing to MPS not supported yet. Don't check for equivalence here
-
-  return true;
+  maximize_ = maximize;
 }
 
 template <typename i_t, typename f_t>
@@ -362,6 +133,7 @@ void optimization_problem_t<i_t, f_t>::set_csr_constraint_matrix(const f_t* A_va
   cuopt_expects(A_offsets != nullptr, error_type_t::ValidationError, "A_offsets cannot be null");
   A_offsets_.resize(size_offsets, stream_view_);
   raft::copy(A_offsets_.data(), A_offsets, size_offsets, stream_view_);
+  n_constraints_ = size_offsets == 0 ? 0 : size_offsets - 1;
 }
 
 template <typename i_t, typename f_t>
@@ -377,8 +149,8 @@ template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_objective_coefficients(const f_t* c, i_t size)
 {
   cuopt_expects(c != nullptr, error_type_t::ValidationError, "c cannot be null");
-  c_.resize(size, stream_view_);
   n_vars_ = size;
+  c_.resize(size, stream_view_);
   raft::copy(c_.data(), c, size, stream_view_);
 }
 
@@ -412,9 +184,9 @@ void optimization_problem_t<i_t, f_t>::set_quadratic_objective_matrix(
     cuopt_expects(Q_indices != nullptr, error_type_t::ValidationError, "Q_indices cannot be null");
   }
 
-  if (size_offsets != 0) {
-    cuopt_expects(Q_offsets != nullptr, error_type_t::ValidationError, "Q_offsets cannot be null");
-  }
+  cuopt_expects(
+    size_offsets >= 1, error_type_t::ValidationError, "Q_offsets must have at least 1 element");
+  cuopt_expects(Q_offsets != nullptr, error_type_t::ValidationError, "Q_offsets cannot be null");
 
   // Replace Q with Q + Q^T
   i_t qn    = size_offsets - 1;  // Number of variables
@@ -528,6 +300,34 @@ void optimization_problem_t<i_t, f_t>::set_variable_upper_bounds(const f_t* vari
   raft::copy(variable_upper_bounds_.data(), variable_upper_bounds, size, stream_view_);
 }
 
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_variable_types(const var_t* variable_types, i_t size)
+{
+  cuopt_expects(
+    variable_types != nullptr, error_type_t::ValidationError, "variable_types cannot be null");
+  variable_types_.resize(size, stream_view_);
+  raft::copy(variable_types_.data(), variable_types, size, stream_view_);
+
+  // Auto-detect problem category based on variable types
+  i_t n_integer = thrust::count_if(handle_ptr_->get_thrust_policy(),
+                                   variable_types_.begin(),
+                                   variable_types_.end(),
+                                   [] __device__(auto val) { return val == var_t::INTEGER; });
+  if (n_integer == size) {
+    problem_category_ = problem_category_t::IP;
+  } else if (n_integer > 0) {
+    problem_category_ = problem_category_t::MIP;
+  } else {
+    problem_category_ = problem_category_t::LP;
+  }
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_problem_category(const problem_category_t& category)
+{
+  problem_category_ = category;
+}
+
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_constraint_lower_bounds(
   const f_t* constraint_lower_bounds, i_t size)
@@ -565,57 +365,34 @@ void optimization_problem_t<i_t, f_t>::set_row_types(const char* row_types, i_t
   raft::copy(row_types_.data(), row_types, size, stream_view_);
 }
 
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::set_variable_types(const var_t* var_types, i_t size)
-{
-  cuopt_expects(var_types != nullptr, error_type_t::ValidationError, "var_types cannot be null");
-  variable_types_.resize(size, stream_view_);
-  raft::copy(variable_types_.data(), var_types, size, stream_view_);
-  // TODO when having a unified problem representation
-  // compute this in a single places (currently also in problem.cu)
-  i_t n_integer = thrust::count_if(handle_ptr_->get_thrust_policy(),
-                                   variable_types_.begin(),
-                                   variable_types_.end(),
-                                   [] __device__(auto val) { return val == var_t::INTEGER; });
-  // by default it is LP
-  if (n_integer == size) {
-    problem_category_ = problem_category_t::IP;
-  } else if (n_integer > 0) {
-    problem_category_ = problem_category_t::MIP;
-  }
-}
-
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::set_problem_category(const problem_category_t& category)
-{
-  problem_category_ = category;
-}
-
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_objective_name(const std::string& objective_name)
 {
   objective_name_ = objective_name;
 }
+
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_problem_name(const std::string& problem_name)
 {
   problem_name_ = problem_name;
 }
+
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_variable_names(
   const std::vector<std::string>& variable_names)
 {
   var_names_ = variable_names;
 }
+
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::set_row_names(const std::vector<std::string>& row_names)
 {
   row_names_ = row_names;
 }
 
-// ============================================================================
-// Getters
-// ============================================================================
+// ==============================================================================
+// Device Getters
+// ==============================================================================
 
 template <typename i_t, typename f_t>
 i_t optimization_problem_t<i_t, f_t>::get_n_variables() const
@@ -639,17 +416,8 @@ template <typename i_t, typename f_t>
 i_t optimization_problem_t<i_t, f_t>::get_n_integers() const
 {
   if (variable_types_.size() == 0) return 0;
-
-  return thrust::count(rmm::exec_policy(handle_ptr_->get_stream()),
-                       variable_types_.begin(),
-                       variable_types_.end(),
-                       var_t::INTEGER);
-}
-
-template <typename i_t, typename f_t>
-raft::handle_t const* optimization_problem_t<i_t, f_t>::get_handle_ptr() const noexcept
-{
-  return handle_ptr_;
+  return thrust::count(
+    rmm::exec_policy(stream_view_), variable_types_.begin(), variable_types_.end(), var_t::INTEGER);
 }
 
 template <typename i_t, typename f_t>
@@ -728,25 +496,13 @@ f_t optimization_problem_t<i_t, f_t>::get_objective_offset() const
 }
 
 template <typename i_t, typename f_t>
-const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_values() const
-{
-  return Q_values_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_indices() const
-{
-  return Q_indices_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_offsets() const
+const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds() const
 {
-  return Q_offsets_;
+  return variable_lower_bounds_;
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds() const
+rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
 {
   return variable_lower_bounds_;
 }
@@ -757,22 +513,11 @@ const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_u
   return variable_upper_bounds_;
 }
 
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
-{
-  return variable_lower_bounds_;
-}
-
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
 {
   return variable_upper_bounds_;
 }
-template <typename i_t, typename f_t>
-const rmm::device_uvector<var_t>& optimization_problem_t<i_t, f_t>::get_variable_types() const
-{
-  return variable_types_;
-}
 
 template <typename i_t, typename f_t>
 const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
@@ -782,16 +527,16 @@ const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint
 }
 
 template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
-  const
+rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
 {
-  return constraint_upper_bounds_;
+  return constraint_lower_bounds_;
 }
 
 template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
+const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
+  const
 {
-  return constraint_lower_bounds_;
+  return constraint_upper_bounds_;
 }
 
 template <typename i_t, typename f_t>
@@ -806,6 +551,24 @@ const rmm::device_uvector<char>& optimization_problem_t<i_t, f_t>::get_row_types
   return row_types_;
 }
 
+template <typename i_t, typename f_t>
+const rmm::device_uvector<var_t>& optimization_problem_t<i_t, f_t>::get_variable_types() const
+{
+  return variable_types_;
+}
+
+template <typename i_t, typename f_t>
+bool optimization_problem_t<i_t, f_t>::get_sense() const
+{
+  return maximize_;
+}
+
+template <typename i_t, typename f_t>
+bool optimization_problem_t<i_t, f_t>::empty() const
+{
+  return n_vars_ == 0 || n_constraints_ == 0;
+}
+
 template <typename i_t, typename f_t>
 std::string optimization_problem_t<i_t, f_t>::get_objective_name() const
 {
@@ -837,17 +600,162 @@ const std::vector<std::string>& optimization_problem_t<i_t, f_t>::get_row_names(
 }
 
 template <typename i_t, typename f_t>
-bool optimization_problem_t<i_t, f_t>::get_sense() const
+const std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_offsets() const
 {
-  return maximize_;
+  return Q_offsets_;
 }
 
 template <typename i_t, typename f_t>
-bool optimization_problem_t<i_t, f_t>::empty() const
+const std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_indices() const
+{
+  return Q_indices_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_values() const
+{
+  return Q_values_;
+}
+
+template <typename i_t, typename f_t>
+bool optimization_problem_t<i_t, f_t>::has_quadratic_objective() const
+{
+  return !Q_values_.empty();
+}
+
+template <typename i_t, typename f_t>
+raft::handle_t const* optimization_problem_t<i_t, f_t>::get_handle_ptr() const noexcept
+{
+  return handle_ptr_;
+}
+
+// ==============================================================================
+// Conversion
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<optimization_problem_t<i_t, f_t>>
+optimization_problem_t<i_t, f_t>::to_optimization_problem(raft::handle_t const* /*handle_ptr*/)
+{
+  // Already a GPU problem, return nullptr
+  return nullptr;
+}
+
+// ==============================================================================
+// Host Getters (copy from GPU to CPU)
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+std::vector<f_t> optimization_problem_t<i_t, f_t>::get_constraint_matrix_values_host() const
+{
+  std::vector<f_t> host_data(A_.size());
+  raft::copy(host_data.data(), A_.data(), A_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<i_t> optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices_host() const
+{
+  std::vector<i_t> host_data(A_indices_.size());
+  raft::copy(host_data.data(), A_indices_.data(), A_indices_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<i_t> optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets_host() const
+{
+  std::vector<i_t> host_data(A_offsets_.size());
+  raft::copy(host_data.data(), A_offsets_.data(), A_offsets_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> optimization_problem_t<i_t, f_t>::get_constraint_bounds_host() const
+{
+  std::vector<f_t> host_data(b_.size());
+  raft::copy(host_data.data(), b_.data(), b_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> optimization_problem_t<i_t, f_t>::get_objective_coefficients_host() const
 {
-  return n_vars_ == 0 && n_constraints_ == 0;
+  std::vector<f_t> host_data(c_.size());
+  raft::copy(host_data.data(), c_.data(), c_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
 }
 
+template <typename i_t, typename f_t>
+std::vector<f_t> optimization_problem_t<i_t, f_t>::get_variable_lower_bounds_host() const
+{
+  std::vector<f_t> host_data(variable_lower_bounds_.size());
+  raft::copy(
+    host_data.data(), variable_lower_bounds_.data(), variable_lower_bounds_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> optimization_problem_t<i_t, f_t>::get_variable_upper_bounds_host() const
+{
+  std::vector<f_t> host_data(variable_upper_bounds_.size());
+  raft::copy(
+    host_data.data(), variable_upper_bounds_.data(), variable_upper_bounds_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds_host() const
+{
+  std::vector<f_t> host_data(constraint_lower_bounds_.size());
+  raft::copy(host_data.data(),
+             constraint_lower_bounds_.data(),
+             constraint_lower_bounds_.size(),
+             stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds_host() const
+{
+  std::vector<f_t> host_data(constraint_upper_bounds_.size());
+  raft::copy(host_data.data(),
+             constraint_upper_bounds_.data(),
+             constraint_upper_bounds_.size(),
+             stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<char> optimization_problem_t<i_t, f_t>::get_row_types_host() const
+{
+  std::vector<char> host_data(row_types_.size());
+  raft::copy(host_data.data(), row_types_.data(), row_types_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<var_t> optimization_problem_t<i_t, f_t>::get_variable_types_host() const
+{
+  std::vector<var_t> host_data(variable_types_.size());
+  raft::copy(host_data.data(), variable_types_.data(), variable_types_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+// ==============================================================================
+// View
+// ==============================================================================
+
 template <typename i_t, typename f_t>
 typename optimization_problem_t<i_t, f_t>::view_t optimization_problem_t<i_t, f_t>::view() const
 {
@@ -863,12 +771,14 @@ typename optimization_problem_t<i_t, f_t>::view_t optimization_problem_t<i_t, f_
                                                  get_constraint_matrix_offsets().size()};
   v.b =
     raft::device_span<const f_t>{get_constraint_bounds().data(), get_constraint_bounds().size()};
-  v.c                       = raft::device_span<const f_t>{get_objective_coefficients().data(),
-                                                           get_objective_coefficients().size()};
-  v.variable_lower_bounds   = raft::device_span<const f_t>{get_variable_lower_bounds().data(),
-                                                           get_variable_lower_bounds().size()};
-  v.variable_upper_bounds   = raft::device_span<const f_t>{get_variable_upper_bounds().data(),
-                                                           get_variable_upper_bounds().size()};
+  v.c                     = raft::device_span<const f_t>{get_objective_coefficients().data(),
+                                                         get_objective_coefficients().size()};
+  v.variable_lower_bounds = raft::device_span<const f_t>{get_variable_lower_bounds().data(),
+                                                         get_variable_lower_bounds().size()};
+  v.variable_upper_bounds = raft::device_span<const f_t>{get_variable_upper_bounds().data(),
+                                                         get_variable_upper_bounds().size()};
+  v.variable_types =
+    raft::device_span<const var_t>{get_variable_types().data(), get_variable_types().size()};
   v.constraint_lower_bounds = raft::device_span<const f_t>{get_constraint_lower_bounds().data(),
                                                            get_constraint_lower_bounds().size()};
   v.constraint_upper_bounds = raft::device_span<const f_t>{get_constraint_upper_bounds().data(),
@@ -876,11 +786,9 @@ typename optimization_problem_t<i_t, f_t>::view_t optimization_problem_t<i_t, f_
   return v;
 }
 
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::set_maximize(bool _maximize)
-{
-  maximize_ = _maximize;
-}
+// ==============================================================================
+// File I/O
+// ==============================================================================
 
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_path)
@@ -903,8 +811,9 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
   auto constraint_upper_bounds   = cuopt::host_copy(get_constraint_upper_bounds(), stream);
   auto row_types                 = cuopt::host_copy(get_row_types(), stream);
 
-  // Set constraint matrix in CSR format
-  if (get_nnz() != 0) {
+  // Set constraint matrix in CSR format (guard on offsets, not nnz, to preserve
+  // zero-nnz but structurally valid matrices)
+  if (!constraint_matrix_offsets.empty()) {
     data_model_view.set_csr_constraint_matrix(constraint_matrix_values.data(),
                                               constraint_matrix_values.size(),
                                               constraint_matrix_indices.data(),
@@ -932,6 +841,8 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
   if (get_n_variables() != 0) {
     data_model_view.set_variable_lower_bounds(variable_lower_bounds.data(),
                                               variable_lower_bounds.size());
+  }
+  if (!variable_upper_bounds.empty()) {
     data_model_view.set_variable_upper_bounds(variable_upper_bounds.data(),
                                               variable_upper_bounds.size());
   }
@@ -941,19 +852,28 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
     data_model_view.set_row_types(row_types.data(), row_types.size());
   }
 
-  // Set constraint bounds (lower and upper)
-  if (get_constraint_lower_bounds().size() != 0 && get_constraint_upper_bounds().size() != 0) {
+  if (!constraint_lower_bounds.empty()) {
     data_model_view.set_constraint_lower_bounds(constraint_lower_bounds.data(),
                                                 constraint_lower_bounds.size());
+  }
+  if (!constraint_upper_bounds.empty()) {
     data_model_view.set_constraint_upper_bounds(constraint_upper_bounds.data(),
                                                 constraint_upper_bounds.size());
   }
 
-  // Create a temporary vector to hold the converted variable types
-  std::vector<char> variable_types(get_n_variables());
-  // Set variable types (convert from enum to char)
+  // Set problem and variable names FIRST (before variable types)
+  if (!get_problem_name().empty()) { data_model_view.set_problem_name(get_problem_name()); }
+  if (!get_objective_name().empty()) { data_model_view.set_objective_name(get_objective_name()); }
+  if (!get_variable_names().empty()) { data_model_view.set_variable_names(get_variable_names()); }
+  if (!get_row_names().empty()) { data_model_view.set_row_names(get_row_names()); }
+
+  // Set variable types AFTER names (convert from enum to char)
+  // CRITICAL: Declare variable_types OUTSIDE the if block so it stays alive
+  // until after write_mps() is called, since data_model_view stores a span (pointer) to it
+  std::vector<char> variable_types;
   if (get_n_variables() != 0) {
     auto enum_variable_types = cuopt::host_copy(get_variable_types(), stream);
+    variable_types.resize(enum_variable_types.size());
 
     // Convert enum types to char types
     for (size_t i = 0; i < variable_types.size(); ++i) {
@@ -963,15 +883,6 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
     data_model_view.set_variable_types(variable_types.data(), variable_types.size());
   }
 
-  // Set problem and variable names if available
-  if (!get_problem_name().empty()) { data_model_view.set_problem_name(get_problem_name()); }
-
-  if (!get_objective_name().empty()) { data_model_view.set_objective_name(get_objective_name()); }
-
-  if (!get_variable_names().empty()) { data_model_view.set_variable_names(get_variable_names()); }
-
-  if (!get_row_names().empty()) { data_model_view.set_row_names(get_row_names()); }
-
   cuopt::mps_parser::write_mps(data_model_view, mps_file_path);
 }
 
@@ -1057,20 +968,552 @@ void optimization_problem_t<i_t, f_t>::print_scaling_information() const
   CUOPT_LOG_INFO("");
 }
 
+// ==============================================================================
+// Comparison
+// ==============================================================================
+
+/**
+ * @brief Compare two CSR matrices for equivalence under row and column permutations.
+ *
+ * @param this_offsets Row offsets of first matrix
+ * @param this_indices Column indices of first matrix
+ * @param this_values Values of first matrix
+ * @param other_offsets Row offsets of second matrix
+ * @param other_indices Column indices of second matrix
+ * @param other_values Values of second matrix
+ * @param d_row_perm_inv Inverse row permutation (maps other's row indices to this's)
+ * @param d_col_perm_inv Inverse column permutation (maps other's col indices to this's)
+ * @param n_cols Number of columns (used for sort key computation)
+ * @param stream CUDA stream
+ * @return true if matrices are equivalent under the given permutations
+ */
 template <typename i_t, typename f_t>
-bool optimization_problem_t<i_t, f_t>::has_quadratic_objective() const
+static bool csr_matrices_equivalent_with_permutation(const rmm::device_uvector<i_t>& this_offsets,
+                                                     const rmm::device_uvector<i_t>& this_indices,
+                                                     const rmm::device_uvector<f_t>& this_values,
+                                                     const rmm::device_uvector<i_t>& other_offsets,
+                                                     const rmm::device_uvector<i_t>& other_indices,
+                                                     const rmm::device_uvector<f_t>& other_values,
+                                                     const rmm::device_uvector<i_t>& d_row_perm_inv,
+                                                     const rmm::device_uvector<i_t>& d_col_perm_inv,
+                                                     i_t n_cols,
+                                                     rmm::cuda_stream_view stream)
 {
-  return !Q_values_.empty();
-}
-// NOTE: Explicitly instantiate all types here in order to avoid linker error
-#if MIP_INSTANTIATE_FLOAT
-template class optimization_problem_t<int, float>;
-#endif
-#if MIP_INSTANTIATE_DOUBLE
-template class optimization_problem_t<int, double>;
-#endif
+  const i_t nnz = static_cast<i_t>(this_values.size());
+  if (nnz != static_cast<i_t>(other_values.size())) { return false; }
+  if (nnz == 0) { return true; }
+
+  auto policy = rmm::exec_policy(stream);
+
+  // Expand CSR row offsets to row indices for 'this'
+  rmm::device_uvector<i_t> this_rows(nnz, stream);
+  rmm::device_uvector<i_t> this_cols(nnz, stream);
+  rmm::device_uvector<f_t> this_vals(nnz, stream);
 
-// TODO current raft to cusparse wrappers only support int64_t
-// can be CUSPARSE_INDEX_16U, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_64I
+  // upper_bound returns 1-based indices; convert to 0-based
+  thrust::upper_bound(policy,
+                      this_offsets.begin(),
+                      this_offsets.end(),
+                      thrust::make_counting_iterator<i_t>(0),
+                      thrust::make_counting_iterator<i_t>(nnz),
+                      this_rows.begin());
+  thrust::transform(
+    policy, this_rows.begin(), this_rows.end(), this_rows.begin(), [] __device__(i_t r) {
+      return r - 1;
+    });
+
+  thrust::copy(policy, this_indices.begin(), this_indices.end(), this_cols.begin());
+  thrust::copy(policy, this_values.begin(), this_values.end(), this_vals.begin());
+
+  // For 'other': expand and apply inverse permutations to map to 'this' coordinate system
+  rmm::device_uvector<i_t> other_rows(nnz, stream);
+  rmm::device_uvector<i_t> other_cols(nnz, stream);
+  rmm::device_uvector<f_t> other_vals(nnz, stream);
+
+  thrust::upper_bound(policy,
+                      other_offsets.begin(),
+                      other_offsets.end(),
+                      thrust::make_counting_iterator<i_t>(0),
+                      thrust::make_counting_iterator<i_t>(nnz),
+                      other_rows.begin());
+  thrust::transform(
+    policy, other_rows.begin(), other_rows.end(), other_rows.begin(), [] __device__(i_t r) {
+      return r - 1;
+    });
+
+  thrust::gather(
+    policy, other_rows.begin(), other_rows.end(), d_row_perm_inv.begin(), other_rows.begin());
+
+  thrust::gather(
+    policy, other_indices.begin(), other_indices.end(), d_col_perm_inv.begin(), other_cols.begin());
+
+  thrust::copy(policy, other_values.begin(), other_values.end(), other_vals.begin());
+
+  // Create sort keys: row * n_cols + col (to sort by row then column)
+  rmm::device_uvector<int64_t> this_keys(nnz, stream);
+  rmm::device_uvector<int64_t> other_keys(nnz, stream);
+
+  const int64_t n_cols_64 = n_cols;
+  thrust::transform(policy,
+                    thrust::make_zip_iterator(this_rows.begin(), this_cols.begin()),
+                    thrust::make_zip_iterator(this_rows.end(), this_cols.end()),
+                    this_keys.begin(),
+                    [n_cols_64] __device__(thrust::tuple<i_t, i_t> rc) {
+                      return static_cast<int64_t>(thrust::get<0>(rc)) * n_cols_64 +
+                             static_cast<int64_t>(thrust::get<1>(rc));
+                    });
+
+  thrust::transform(policy,
+                    thrust::make_zip_iterator(other_rows.begin(), other_cols.begin()),
+                    thrust::make_zip_iterator(other_rows.end(), other_cols.end()),
+                    other_keys.begin(),
+                    [n_cols_64] __device__(thrust::tuple<i_t, i_t> rc) {
+                      return static_cast<int64_t>(thrust::get<0>(rc)) * n_cols_64 +
+                             static_cast<int64_t>(thrust::get<1>(rc));
+                    });
+
+  thrust::sort_by_key(policy, this_keys.begin(), this_keys.end(), this_vals.begin());
+  thrust::sort_by_key(policy, other_keys.begin(), other_keys.end(), other_vals.begin());
+
+  if (!thrust::equal(policy, this_keys.begin(), this_keys.end(), other_keys.begin())) {
+    return false;
+  }
+
+  if (!thrust::equal(policy, this_vals.begin(), this_vals.end(), other_vals.begin())) {
+    return false;
+  }
+
+  return true;
+}
+
+template <typename i_t, typename f_t>
+bool optimization_problem_t<i_t, f_t>::is_equivalent(
+  const optimization_problem_t<i_t, f_t>& other) const
+{
+  if (maximize_ != other.maximize_) { return false; }
+  if (n_vars_ != other.n_vars_) { return false; }
+  if (n_constraints_ != other.n_constraints_) { return false; }
+  if (objective_scaling_factor_ != other.objective_scaling_factor_) { return false; }
+  if (objective_offset_ != other.objective_offset_) { return false; }
+  if (problem_category_ != other.problem_category_) { return false; }
+  if (A_.size() != other.A_.size()) { return false; }
+
+  if (var_names_.size() != static_cast<size_t>(n_vars_) ||
+      other.var_names_.size() != static_cast<size_t>(other.n_vars_)) {
+    return false;
+  }
+  if (row_names_.size() != static_cast<size_t>(n_constraints_) ||
+      other.row_names_.size() != static_cast<size_t>(other.n_constraints_)) {
+    return false;
+  }
+
+  // Build variable permutation: var_perm[i] = index j in other where var_names_[i] ==
+  // other.var_names_[j]
+  std::unordered_map<std::string, i_t> other_var_idx;
+  for (size_t j = 0; j < other.var_names_.size(); ++j) {
+    other_var_idx[other.var_names_[j]] = static_cast<i_t>(j);
+  }
+  std::vector<i_t> var_perm(n_vars_);
+  for (i_t i = 0; i < n_vars_; ++i) {
+    auto it = other_var_idx.find(var_names_[i]);
+    if (it == other_var_idx.end()) { return false; }
+    var_perm[i] = it->second;
+  }
+
+  // Build row permutation: row_perm[i] = index j in other where row_names_[i] ==
+  // other.row_names_[j]
+  std::unordered_map<std::string, i_t> other_row_idx;
+  for (size_t j = 0; j < other.row_names_.size(); ++j) {
+    other_row_idx[other.row_names_[j]] = static_cast<i_t>(j);
+  }
+  std::vector<i_t> row_perm(n_constraints_);
+  for (i_t i = 0; i < n_constraints_; ++i) {
+    auto it = other_row_idx.find(row_names_[i]);
+    if (it == other_row_idx.end()) { return false; }
+    row_perm[i] = it->second;
+  }
+
+  // Upload permutations to GPU
+  rmm::device_uvector<i_t> d_var_perm(n_vars_, stream_view_);
+  rmm::device_uvector<i_t> d_row_perm(n_constraints_, stream_view_);
+  raft::copy(d_var_perm.data(), var_perm.data(), n_vars_, stream_view_);
+  raft::copy(d_row_perm.data(), row_perm.data(), n_constraints_, stream_view_);
+
+  auto policy = rmm::exec_policy(stream_view_);
+
+  auto permuted_eq = [&](auto this_begin, auto this_end, auto other_begin, auto perm_begin) {
+    auto other_perm = thrust::make_permutation_iterator(other_begin, perm_begin);
+    return thrust::equal(policy, this_begin, this_end, other_perm);
+  };
+
+  // Compare variable-indexed arrays
+  if (c_.size() != other.c_.size()) { return false; }
+  if (!permuted_eq(c_.begin(), c_.end(), other.c_.begin(), d_var_perm.begin())) { return false; }
+  if (variable_lower_bounds_.size() != other.variable_lower_bounds_.size()) { return false; }
+  if (!permuted_eq(variable_lower_bounds_.begin(),
+                   variable_lower_bounds_.end(),
+                   other.variable_lower_bounds_.begin(),
+                   d_var_perm.begin())) {
+    return false;
+  }
+  if (variable_upper_bounds_.size() != other.variable_upper_bounds_.size()) { return false; }
+  if (!permuted_eq(variable_upper_bounds_.begin(),
+                   variable_upper_bounds_.end(),
+                   other.variable_upper_bounds_.begin(),
+                   d_var_perm.begin())) {
+    return false;
+  }
+  if (variable_types_.size() != other.variable_types_.size()) { return false; }
+  if (!permuted_eq(variable_types_.begin(),
+                   variable_types_.end(),
+                   other.variable_types_.begin(),
+                   d_var_perm.begin())) {
+    return false;
+  }
+
+  // Compare constraint-indexed arrays
+  if (b_.size() != other.b_.size()) { return false; }
+  if (!permuted_eq(b_.begin(), b_.end(), other.b_.begin(), d_row_perm.begin())) { return false; }
+  if (constraint_lower_bounds_.size() != other.constraint_lower_bounds_.size()) { return false; }
+  if (!permuted_eq(constraint_lower_bounds_.begin(),
+                   constraint_lower_bounds_.end(),
+                   other.constraint_lower_bounds_.begin(),
+                   d_row_perm.begin())) {
+    return false;
+  }
+  if (constraint_upper_bounds_.size() != other.constraint_upper_bounds_.size()) { return false; }
+  if (!permuted_eq(constraint_upper_bounds_.begin(),
+                   constraint_upper_bounds_.end(),
+                   other.constraint_upper_bounds_.begin(),
+                   d_row_perm.begin())) {
+    return false;
+  }
+  if (row_types_.size() != other.row_types_.size()) { return false; }
+  if (!permuted_eq(
+        row_types_.begin(), row_types_.end(), other.row_types_.begin(), d_row_perm.begin())) {
+    return false;
+  }
+
+  // Build inverse permutations on CPU (needed for CSR comparisons)
+  std::vector<i_t> var_perm_inv(n_vars_);
+  for (i_t i = 0; i < n_vars_; ++i) {
+    var_perm_inv[var_perm[i]] = i;
+  }
+  std::vector<i_t> row_perm_inv(n_constraints_);
+  for (i_t i = 0; i < n_constraints_; ++i) {
+    row_perm_inv[row_perm[i]] = i;
+  }
+
+  // Upload inverse permutations to GPU
+  rmm::device_uvector<i_t> d_var_perm_inv(n_vars_, stream_view_);
+  rmm::device_uvector<i_t> d_row_perm_inv(n_constraints_, stream_view_);
+  raft::copy(d_var_perm_inv.data(), var_perm_inv.data(), n_vars_, stream_view_);
+  raft::copy(d_row_perm_inv.data(), row_perm_inv.data(), n_constraints_, stream_view_);
+
+  // Constraint matrix (A) comparison with row and column permutations
+  if (!csr_matrices_equivalent_with_permutation(A_offsets_,
+                                                A_indices_,
+                                                A_,
+                                                other.A_offsets_,
+                                                other.A_indices_,
+                                                other.A_,
+                                                d_row_perm_inv,
+                                                d_var_perm_inv,
+                                                n_vars_,
+                                                stream_view_)) {
+    return false;
+  }
+
+  // Q matrix writing to MPS not supported yet. Don't check for equivalence here
+
+  return true;
+}
+
+template <typename i_t, typename f_t>
+bool optimization_problem_t<i_t, f_t>::is_equivalent(
+  const optimization_problem_interface_t<i_t, f_t>& other) const
+{
+  // Compare scalar properties
+  if (maximize_ != other.get_sense()) return false;
+  if (n_vars_ != other.get_n_variables()) return false;
+  if (n_constraints_ != other.get_n_constraints()) return false;
+  if (std::abs(objective_scaling_factor_ - other.get_objective_scaling_factor()) > 1e-9)
+    return false;
+  if (std::abs(objective_offset_ - other.get_objective_offset()) > 1e-9) return false;
+  if (problem_category_ != other.get_problem_category()) return false;
+
+  // Get host data from both problems
+  auto this_c  = get_objective_coefficients_host();
+  auto other_c = other.get_objective_coefficients_host();
+  if (this_c.size() != other_c.size()) return false;
+
+  auto this_var_lb  = get_variable_lower_bounds_host();
+  auto other_var_lb = other.get_variable_lower_bounds_host();
+  if (this_var_lb.size() != other_var_lb.size()) return false;
+
+  auto this_var_ub  = get_variable_upper_bounds_host();
+  auto other_var_ub = other.get_variable_upper_bounds_host();
+  if (this_var_ub.size() != other_var_ub.size()) return false;
+
+  auto this_var_types  = get_variable_types_host();
+  auto other_var_types = other.get_variable_types_host();
+  if (this_var_types.size() != other_var_types.size()) return false;
+
+  auto this_b  = get_constraint_bounds_host();
+  auto other_b = other.get_constraint_bounds_host();
+  if (this_b.size() != other_b.size()) return false;
+
+  auto this_A_values  = get_constraint_matrix_values_host();
+  auto other_A_values = other.get_constraint_matrix_values_host();
+  if (this_A_values.size() != other_A_values.size()) return false;
+
+  // Check if we have variable and row names for permutation matching
+  const auto& this_var_names  = get_variable_names();
+  const auto& other_var_names = other.get_variable_names();
+  const auto& this_row_names  = get_row_names();
+  const auto& other_row_names = other.get_row_names();
+
+  bool has_names = !this_var_names.empty() && !other_var_names.empty() && !this_row_names.empty() &&
+                   !other_row_names.empty();
+
+  // If no names, fall back to direct-order comparison
+  if (!has_names) {
+    for (i_t i = 0; i < n_vars_; ++i) {
+      if (std::abs(this_c[i] - other_c[i]) > 1e-9) return false;
+      if (std::abs(this_var_lb[i] - other_var_lb[i]) > 1e-9) return false;
+      if (std::abs(this_var_ub[i] - other_var_ub[i]) > 1e-9) return false;
+      if (this_var_types[i] != other_var_types[i]) return false;
+    }
+    for (i_t i = 0; i < n_constraints_; ++i) {
+      if (std::abs(this_b[i] - other_b[i]) > 1e-9) return false;
+    }
+    // Direct CSR comparison without permutation
+    auto this_A_indices  = get_constraint_matrix_indices_host();
+    auto other_A_indices = other.get_constraint_matrix_indices_host();
+    auto this_A_offsets  = get_constraint_matrix_offsets_host();
+    auto other_A_offsets = other.get_constraint_matrix_offsets_host();
+    if (this_A_indices.size() != other_A_indices.size()) return false;
+    if (this_A_offsets.size() != other_A_offsets.size()) return false;
+    for (size_t i = 0; i < this_A_values.size(); ++i) {
+      if (std::abs(this_A_values[i] - other_A_values[i]) > 1e-9) return false;
+    }
+    for (size_t i = 0; i < this_A_indices.size(); ++i) {
+      if (this_A_indices[i] != other_A_indices[i]) return false;
+    }
+    for (size_t i = 0; i < this_A_offsets.size(); ++i) {
+      if (this_A_offsets[i] != other_A_offsets[i]) return false;
+    }
+    return true;
+  }
+
+  // Build variable permutation map
+  std::unordered_map<std::string, i_t> other_var_idx;
+  for (size_t j = 0; j < other_var_names.size(); ++j) {
+    other_var_idx[other_var_names[j]] = static_cast<i_t>(j);
+  }
+
+  std::vector<i_t> var_perm(n_vars_);
+  for (i_t i = 0; i < n_vars_; ++i) {
+    auto it = other_var_idx.find(this_var_names[i]);
+    if (it == other_var_idx.end()) return false;
+    var_perm[i] = it->second;
+  }
+
+  // Build row permutation map
+  std::unordered_map<std::string, i_t> other_row_idx;
+  for (size_t j = 0; j < other_row_names.size(); ++j) {
+    other_row_idx[other_row_names[j]] = static_cast<i_t>(j);
+  }
+
+  std::vector<i_t> row_perm(n_constraints_);
+  for (i_t i = 0; i < n_constraints_; ++i) {
+    auto it = other_row_idx.find(this_row_names[i]);
+    if (it == other_row_idx.end()) return false;
+    row_perm[i] = it->second;
+  }
+
+  // Compare variable-indexed arrays with permutation
+  for (i_t i = 0; i < n_vars_; ++i) {
+    i_t j = var_perm[i];
+    if (std::abs(this_c[i] - other_c[j]) > 1e-9) return false;
+    if (std::abs(this_var_lb[i] - other_var_lb[j]) > 1e-9) return false;
+    if (std::abs(this_var_ub[i] - other_var_ub[j]) > 1e-9) return false;
+    if (this_var_types[i] != other_var_types[j]) return false;
+  }
+
+  // Compare constraint-indexed arrays with permutation
+  for (i_t i = 0; i < n_constraints_; ++i) {
+    i_t j = row_perm[i];
+    if (std::abs(this_b[i] - other_b[j]) > 1e-9) return false;
+  }
+
+  // Compare constraint lower/upper bounds with permutation
+  auto this_clb  = get_constraint_lower_bounds_host();
+  auto other_clb = other.get_constraint_lower_bounds_host();
+  if (this_clb.size() != other_clb.size()) return false;
+  for (i_t i = 0; i < n_constraints_ && i < static_cast<i_t>(this_clb.size()); ++i) {
+    i_t j = row_perm[i];
+    if (std::abs(this_clb[i] - other_clb[j]) > 1e-9) return false;
+  }
+
+  auto this_cub  = get_constraint_upper_bounds_host();
+  auto other_cub = other.get_constraint_upper_bounds_host();
+  if (this_cub.size() != other_cub.size()) return false;
+  for (i_t i = 0; i < n_constraints_ && i < static_cast<i_t>(this_cub.size()); ++i) {
+    i_t j = row_perm[i];
+    if (std::abs(this_cub[i] - other_cub[j]) > 1e-9) return false;
+  }
+
+  // Compare row types with permutation
+  auto this_rt  = get_row_types_host();
+  auto other_rt = other.get_row_types_host();
+  if (this_rt.size() != other_rt.size()) return false;
+  for (i_t i = 0; i < n_constraints_ && i < static_cast<i_t>(this_rt.size()); ++i) {
+    i_t j = row_perm[i];
+    if (this_rt[i] != other_rt[j]) return false;
+  }
+
+  // Compare CSR constraint matrix with row/column permutations
+  auto this_A_indices  = get_constraint_matrix_indices_host();
+  auto other_A_indices = other.get_constraint_matrix_indices_host();
+  auto this_A_offsets  = get_constraint_matrix_offsets_host();
+  auto other_A_offsets = other.get_constraint_matrix_offsets_host();
+
+  if (!csr_matrices_equivalent_with_permutation_host(this_A_offsets,
+                                                     this_A_indices,
+                                                     this_A_values,
+                                                     other_A_offsets,
+                                                     other_A_indices,
+                                                     other_A_values,
+                                                     row_perm,
+                                                     var_perm)) {
+    return false;
+  }
+
+  return true;
+}
+
+// ==============================================================================
+// C API Support: Copy to Host (GPU Implementation)
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_objective_coefficients_to_host(f_t* output,
+                                                                           i_t size) const
+{
+  cuopt_expects(output != nullptr && size >= 0 && size <= static_cast<i_t>(c_.size()),
+                error_type_t::ValidationError,
+                "copy_objective_coefficients_to_host: null output or invalid size");
+  RAFT_CUDA_TRY(cudaMemcpy(output, c_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_constraint_matrix_to_host(
+  f_t* values, i_t* indices, i_t* offsets, i_t num_values, i_t num_indices, i_t num_offsets) const
+{
+  cuopt_expects(values != nullptr && num_values >= 0 && num_values <= static_cast<i_t>(A_.size()),
+                error_type_t::ValidationError,
+                "copy_constraint_matrix_to_host: null values or invalid size");
+  cuopt_expects(
+    indices != nullptr && num_indices >= 0 && num_indices <= static_cast<i_t>(A_indices_.size()),
+    error_type_t::ValidationError,
+    "copy_constraint_matrix_to_host: null indices or invalid size");
+  cuopt_expects(
+    offsets != nullptr && num_offsets >= 0 && num_offsets <= static_cast<i_t>(A_offsets_.size()),
+    error_type_t::ValidationError,
+    "copy_constraint_matrix_to_host: null offsets or invalid size");
+  RAFT_CUDA_TRY(cudaMemcpy(values, A_.data(), num_values * sizeof(f_t), cudaMemcpyDeviceToHost));
+  RAFT_CUDA_TRY(
+    cudaMemcpy(indices, A_indices_.data(), num_indices * sizeof(i_t), cudaMemcpyDeviceToHost));
+  RAFT_CUDA_TRY(
+    cudaMemcpy(offsets, A_offsets_.data(), num_offsets * sizeof(i_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_row_types_to_host(char* output, i_t size) const
+{
+  cuopt_expects(output != nullptr && size >= 0 && size <= static_cast<i_t>(row_types_.size()),
+                error_type_t::ValidationError,
+                "copy_row_types_to_host: null output or invalid size");
+  RAFT_CUDA_TRY(cudaMemcpy(output, row_types_.data(), size * sizeof(char), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_constraint_bounds_to_host(f_t* output, i_t size) const
+{
+  cuopt_expects(output != nullptr && size >= 0 && size <= static_cast<i_t>(b_.size()),
+                error_type_t::ValidationError,
+                "copy_constraint_bounds_to_host: null output or invalid size");
+  RAFT_CUDA_TRY(cudaMemcpy(output, b_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_constraint_lower_bounds_to_host(f_t* output,
+                                                                            i_t size) const
+{
+  cuopt_expects(
+    output != nullptr && size >= 0 && size <= static_cast<i_t>(constraint_lower_bounds_.size()),
+    error_type_t::ValidationError,
+    "copy_constraint_lower_bounds_to_host: null output or invalid size");
+  RAFT_CUDA_TRY(cudaMemcpy(
+    output, constraint_lower_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_constraint_upper_bounds_to_host(f_t* output,
+                                                                            i_t size) const
+{
+  cuopt_expects(
+    output != nullptr && size >= 0 && size <= static_cast<i_t>(constraint_upper_bounds_.size()),
+    error_type_t::ValidationError,
+    "copy_constraint_upper_bounds_to_host: null output or invalid size");
+  RAFT_CUDA_TRY(cudaMemcpy(
+    output, constraint_upper_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_variable_lower_bounds_to_host(f_t* output,
+                                                                          i_t size) const
+{
+  cuopt_expects(
+    output != nullptr && size >= 0 && size <= static_cast<i_t>(variable_lower_bounds_.size()),
+    error_type_t::ValidationError,
+    "copy_variable_lower_bounds_to_host: null output or invalid size");
+  RAFT_CUDA_TRY(
+    cudaMemcpy(output, variable_lower_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_variable_upper_bounds_to_host(f_t* output,
+                                                                          i_t size) const
+{
+  cuopt_expects(
+    output != nullptr && size >= 0 && size <= static_cast<i_t>(variable_upper_bounds_.size()),
+    error_type_t::ValidationError,
+    "copy_variable_upper_bounds_to_host: null output or invalid size");
+  RAFT_CUDA_TRY(
+    cudaMemcpy(output, variable_upper_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::copy_variable_types_to_host(var_t* output, i_t size) const
+{
+  cuopt_expects(output != nullptr && size >= 0 && size <= static_cast<i_t>(variable_types_.size()),
+                error_type_t::ValidationError,
+                "copy_variable_types_to_host: null output or invalid size");
+  RAFT_CUDA_TRY(
+    cudaMemcpy(output, variable_types_.data(), size * sizeof(var_t), cudaMemcpyDeviceToHost));
+}
+
+// ==============================================================================
+// Template instantiations
+// ==============================================================================
+// Explicit template instantiations matching MIP constants
+#if MIP_INSTANTIATE_FLOAT
+template class optimization_problem_t<int32_t, float>;
+#endif
+#if MIP_INSTANTIATE_DOUBLE
+template class optimization_problem_t<int32_t, double>;
+#endif
 
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/pdlp/solution_conversion.cu b/cpp/src/pdlp/solution_conversion.cu
index ea3c681266..7993445a08 100644
--- a/cpp/src/pdlp/solution_conversion.cu
+++ b/cpp/src/pdlp/solution_conversion.cu
@@ -11,7 +11,7 @@
  */
 
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
-#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/utilities/cython_solve.hpp>
 
 #include <rmm/device_buffer.hpp>
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 9d4ab483fb..5e1e25bbee 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -6,6 +6,7 @@
 /* clang-format on */
 
 #include <cuopt/error.hpp>
+#include <cuopt/linear_programming/solve_remote.hpp>
 #include <pdlp/cusparse_view.hpp>
 #include <pdlp/optimal_batch_size_handler/optimal_batch_size_handler.hpp>
 #include <pdlp/pdlp.cuh>
@@ -25,9 +26,10 @@
 #include <mip_heuristics/utilities/sort_csr.cuh>
 
 #include <cuopt/linear_programming/backend_selection.hpp>
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
-#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/solve.hpp>
@@ -1201,6 +1203,10 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
       CUOPT_LOG_INFO("Third-party presolve is disabled, skipping");
     }
 
+    // Declare result at outer scope so that result->reduced_problem (which may be
+    // referenced by problem.original_problem_ptr) remains alive through the solve.
+    std::optional<detail::third_party_presolve_result_t<i_t, f_t>> result;
+
     if (run_presolve) {
       detail::sort_csr(op_problem);
       // allocate no more than 10% of the time limit to presolve.
@@ -1208,14 +1214,14 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
       // But no less than 1 second, to avoid early timeout triggering known crashes
       const double presolve_time_limit =
         std::max(1.0, std::min(0.1 * lp_timer.remaining_time(), 60.0));
-      presolver   = std::make_unique<detail::third_party_presolve_t<i_t, f_t>>();
-      auto result = presolver->apply(op_problem,
-                                     cuopt::linear_programming::problem_category_t::LP,
-                                     settings.presolver,
-                                     settings.dual_postsolve,
-                                     settings.tolerances.absolute_primal_tolerance,
-                                     settings.tolerances.relative_primal_tolerance,
-                                     presolve_time_limit);
+      presolver = std::make_unique<detail::third_party_presolve_t<i_t, f_t>>();
+      result    = presolver->apply(op_problem,
+                                cuopt::linear_programming::problem_category_t::LP,
+                                settings.presolver,
+                                settings.dual_postsolve,
+                                settings.tolerances.absolute_primal_tolerance,
+                                settings.tolerances.relative_primal_tolerance,
+                                presolve_time_limit);
       if (!result.has_value()) {
         return optimization_problem_solution_t<i_t, f_t>(
           pdlp_termination_status_t::PrimalInfeasible, op_problem.get_handle_ptr()->get_stream());
@@ -1445,69 +1451,87 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
 }
 
 // ============================================================================
-// Interface-based solve overloads with remote execution support
+// CPU problem overloads (convert to GPU, solve, convert solution back)
 // ============================================================================
 
 template <typename i_t, typename f_t>
 std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp(
-  optimization_problem_interface_t<i_t, f_t>* problem_interface,
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
   pdlp_solver_settings_t<i_t, f_t> const& settings,
   bool problem_checking,
   bool use_pdlp_solver_mode,
   bool is_batch_mode)
 {
-  // Check if remote execution is enabled
-  if (is_remote_execution_enabled()) {
-    cuopt_expects(!is_batch_mode,
-                  error_type_t::ValidationError,
-                  "Batch mode with remote execution is not supported via this entry point. "
-                  "Use solve_batch_remote() instead.");
-    CUOPT_LOG_INFO("Remote LP solve requested");
-    return problem_interface->solve_lp_remote(settings, problem_checking, use_pdlp_solver_mode);
-  } else {
-    // Local execution - convert to optimization_problem_t and call original solve_lp
-    CUOPT_LOG_INFO("Local LP solve");
-
-    // Check if this is a CPU problem (test mode: CUOPT_USE_CPU_MEM_FOR_LOCAL=true)
-    auto* cpu_prob = dynamic_cast<cpu_optimization_problem_t<i_t, f_t>*>(problem_interface);
-    if (cpu_prob != nullptr) {
-      CUOPT_LOG_INFO("Test mode: Converting CPU problem to GPU for local solve");
+  CUOPT_LOG_INFO("solve_lp (CPU problem) - converting to GPU for local solve");
 
-      // Create CUDA resources for the conversion
-      rmm::cuda_stream stream;
-      raft::handle_t handle(stream);
+  // Create CUDA resources for the conversion
+  rmm::cuda_stream stream;
+  raft::handle_t handle(stream);
 
-      // Temporarily set the handle on the CPU problem so it can create GPU resources
-      cpu_prob->set_handle(&handle);
+  // Convert CPU problem to GPU problem
+  auto gpu_problem = cpu_problem.to_optimization_problem(&handle);
 
-      // Convert CPU problem to GPU problem
-      auto op_problem = cpu_prob->to_optimization_problem();
+  // Synchronize before solving to ensure conversion is complete
+  stream.synchronize();
 
-      // Clear the handle to avoid dangling pointer after this scope
-      cpu_prob->set_handle(nullptr);
+  // Solve on GPU
+  auto gpu_solution = solve_lp<i_t, f_t>(
+    *gpu_problem, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
 
-      // Synchronize before solving to ensure conversion is complete
-      stream.synchronize();
+  // Ensure all GPU work from the solve is complete before D2H copies in to_cpu_solution(),
+  // which uses rmm::cuda_stream_per_thread (a different stream than the solver used).
+  stream.synchronize();
 
-      // Solve on GPU
-      auto gpu_solution = solve_lp<i_t, f_t>(
-        op_problem, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
+  // Convert GPU solution back to CPU
+  gpu_lp_solution_t<i_t, f_t> gpu_sol_interface(std::move(gpu_solution));
+  return gpu_sol_interface.to_cpu_solution();
+}
 
-      // Ensure all GPU work from the solve is complete before to_cpu_solution() D2H copies.
-      stream.synchronize();
+// ============================================================================
+// Interface-based solve overloads with remote execution support
+// ============================================================================
 
-      CUOPT_LOG_INFO("Test mode: Converting GPU solution back to CPU solution");
-      gpu_lp_solution_t<i_t, f_t> gpu_sol_interface(std::move(gpu_solution));
-      return gpu_sol_interface.to_cpu_solution();
-    }
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp(
+  optimization_problem_interface_t<i_t, f_t>* problem_interface,
+  pdlp_solver_settings_t<i_t, f_t> const& settings,
+  bool problem_checking,
+  bool use_pdlp_solver_mode,
+  bool is_batch_mode)
+{
+  cuopt_expects(problem_interface != nullptr,
+                error_type_t::ValidationError,
+                "problem_interface cannot be null");
 
-    auto op_problem   = problem_interface->to_optimization_problem();
-    auto gpu_solution = solve_lp<i_t, f_t>(
-      op_problem, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
+  // Check if remote execution is enabled (always uses CPU backend)
+  if (is_remote_execution_enabled()) {
+    cuopt_expects(!is_batch_mode,
+                  error_type_t::ValidationError,
+                  "Batch mode with remote execution is not supported via this entry point. "
+                  "Use solve_batch_remote() instead.");
+    auto* cpu_prob = dynamic_cast<cpu_optimization_problem_t<i_t, f_t>*>(problem_interface);
+    cuopt_expects(cpu_prob != nullptr,
+                  error_type_t::ValidationError,
+                  "Remote execution requires CPU memory backend");
+    CUOPT_LOG_INFO("Remote LP solve requested");
+    return solve_lp_remote(*cpu_prob, settings, problem_checking, use_pdlp_solver_mode);
+  }
 
-    // Wrap GPU solution in interface and return
-    return std::make_unique<gpu_lp_solution_t<i_t, f_t>>(std::move(gpu_solution));
+  // Local execution - dispatch to appropriate overload based on problem type
+  auto* cpu_prob = dynamic_cast<cpu_optimization_problem_t<i_t, f_t>*>(problem_interface);
+  if (cpu_prob != nullptr) {
+    // CPU problem: use CPU overload (converts to GPU, solves, converts solution back)
+    return solve_lp(*cpu_prob, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
   }
+
+  // GPU problem: call GPU solver directly
+  auto* gpu_prob = dynamic_cast<optimization_problem_t<i_t, f_t>*>(problem_interface);
+  cuopt_expects(gpu_prob != nullptr,
+                error_type_t::ValidationError,
+                "problem_interface must be either a CPU or GPU optimization problem");
+  auto gpu_solution =
+    solve_lp<i_t, f_t>(*gpu_prob, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
+  return std::make_unique<gpu_lp_solution_t<i_t, f_t>>(std::move(gpu_solution));
 }
 
 #define INSTANTIATE(F_TYPE)                                                            \
@@ -1525,6 +1549,13 @@ std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp(
     bool problem_checking,                                                             \
     bool use_pdlp_solver_mode);                                                        \
                                                                                        \
+  template std::unique_ptr<lp_solution_interface_t<int, F_TYPE>> solve_lp(             \
+    cpu_optimization_problem_t<int, F_TYPE>&,                                          \
+    pdlp_solver_settings_t<int, F_TYPE> const&,                                        \
+    bool,                                                                              \
+    bool,                                                                              \
+    bool);                                                                             \
+                                                                                       \
   template std::unique_ptr<lp_solution_interface_t<int, F_TYPE>> solve_lp(             \
     optimization_problem_interface_t<int, F_TYPE>*,                                    \
     pdlp_solver_settings_t<int, F_TYPE> const&,                                        \
diff --git a/cpp/src/pdlp/solve_remote.cu b/cpp/src/pdlp/solve_remote.cu
index 9230c9a211..a9bf7e3989 100644
--- a/cpp/src/pdlp/solve_remote.cu
+++ b/cpp/src/pdlp/solve_remote.cu
@@ -5,10 +5,9 @@
  */
 /* clang-format on */
 
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
-#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <utilities/logger.hpp>
 
@@ -108,136 +107,6 @@ std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
   return mip_solution;
 }
 
-// ============================================================================
-// Helper: Convert GPU problem to CPU problem (copies all device data to host)
-// ============================================================================
-
-template <typename i_t, typename f_t>
-cpu_optimization_problem_t<i_t, f_t> gpu_problem_to_cpu(
-  gpu_optimization_problem_t<i_t, f_t> const& gpu_problem)
-{
-  auto cpu_problem = cpu_optimization_problem_t<i_t, f_t>(nullptr);  // No CUDA resources for remote
-
-  // Copy scalar properties
-  cpu_problem.set_maximize(gpu_problem.get_sense());
-  cpu_problem.set_objective_offset(gpu_problem.get_objective_offset());
-  cpu_problem.set_objective_scaling_factor(gpu_problem.get_objective_scaling_factor());
-  cpu_problem.set_problem_category(gpu_problem.get_problem_category());
-
-  // Copy names
-  cpu_problem.set_problem_name(gpu_problem.get_problem_name());
-  cpu_problem.set_objective_name(gpu_problem.get_objective_name());
-  cpu_problem.set_variable_names(gpu_problem.get_variable_names());
-  cpu_problem.set_row_names(gpu_problem.get_row_names());
-
-  // Copy objective coefficients
-  auto obj_coeffs = gpu_problem.get_objective_coefficients_host();
-  if (!obj_coeffs.empty()) {
-    cpu_problem.set_objective_coefficients(obj_coeffs.data(), obj_coeffs.size());
-  }
-
-  // Copy constraint matrix (CSR format)
-  auto matrix_values  = gpu_problem.get_constraint_matrix_values_host();
-  auto matrix_indices = gpu_problem.get_constraint_matrix_indices_host();
-  auto matrix_offsets = gpu_problem.get_constraint_matrix_offsets_host();
-  if (!matrix_values.empty()) {
-    cpu_problem.set_csr_constraint_matrix(matrix_values.data(),
-                                          matrix_values.size(),
-                                          matrix_indices.data(),
-                                          matrix_indices.size(),
-                                          matrix_offsets.data(),
-                                          matrix_offsets.size());
-  }
-
-  // Copy constraint bounds (single RHS b-vector)
-  auto constraint_bounds = gpu_problem.get_constraint_bounds_host();
-  if (!constraint_bounds.empty()) {
-    cpu_problem.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
-  }
-
-  // Copy constraint lower/upper bounds
-  auto constraint_lb = gpu_problem.get_constraint_lower_bounds_host();
-  auto constraint_ub = gpu_problem.get_constraint_upper_bounds_host();
-  if (!constraint_lb.empty()) {
-    cpu_problem.set_constraint_lower_bounds(constraint_lb.data(), constraint_lb.size());
-  }
-  if (!constraint_ub.empty()) {
-    cpu_problem.set_constraint_upper_bounds(constraint_ub.data(), constraint_ub.size());
-  }
-
-  // Copy row types (constraint sense: E/L/G)
-  auto row_types = gpu_problem.get_row_types_host();
-  if (!row_types.empty()) { cpu_problem.set_row_types(row_types.data(), row_types.size()); }
-
-  // Copy variable bounds
-  auto var_lb = gpu_problem.get_variable_lower_bounds_host();
-  auto var_ub = gpu_problem.get_variable_upper_bounds_host();
-  if (!var_lb.empty()) { cpu_problem.set_variable_lower_bounds(var_lb.data(), var_lb.size()); }
-  if (!var_ub.empty()) { cpu_problem.set_variable_upper_bounds(var_ub.data(), var_ub.size()); }
-
-  // Copy variable types
-  auto var_types = gpu_problem.get_variable_types_host();
-  if (!var_types.empty()) { cpu_problem.set_variable_types(var_types.data(), var_types.size()); }
-
-  // Copy quadratic objective if present
-  if (gpu_problem.has_quadratic_objective()) {
-    auto quad_offsets = gpu_problem.get_quadratic_objective_offsets();
-    auto quad_indices = gpu_problem.get_quadratic_objective_indices();
-    auto quad_values  = gpu_problem.get_quadratic_objective_values();
-    cpu_problem.set_quadratic_objective_matrix(quad_values.data(),
-                                               quad_values.size(),
-                                               quad_indices.data(),
-                                               quad_indices.size(),
-                                               quad_offsets.data(),
-                                               quad_offsets.size());
-  }
-
-  return cpu_problem;
-}
-
-// ============================================================================
-// Remote execution for GPU problems (converts to CPU then calls CPU remote)
-// ============================================================================
-
-template <typename i_t, typename f_t>
-std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
-  gpu_optimization_problem_t<i_t, f_t> const& gpu_problem,
-  pdlp_solver_settings_t<i_t, f_t> const& settings,
-  bool problem_checking,
-  bool use_pdlp_solver_mode)
-{
-  init_logger_t log(settings.log_file, settings.log_to_console);
-  CUOPT_LOG_INFO("solve_lp_remote (GPU problem) - converting to CPU for remote execution");
-
-  auto cpu_problem = gpu_problem_to_cpu(gpu_problem);
-
-  // Call CPU remote solver (returns unique_ptr<lp_solution_interface_t>)
-  auto cpu_solution_interface =
-    solve_lp_remote(cpu_problem, settings, problem_checking, use_pdlp_solver_mode);
-
-  // Convert CPU solution back to GPU solution (since we started with a GPU problem)
-  auto gpu_solution = cpu_solution_interface->to_gpu_solution(rmm::cuda_stream_per_thread);
-  return std::make_unique<gpu_lp_solution_t<i_t, f_t>>(std::move(gpu_solution));
-}
-
-template <typename i_t, typename f_t>
-std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
-  gpu_optimization_problem_t<i_t, f_t> const& gpu_problem,
-  mip_solver_settings_t<i_t, f_t> const& settings)
-{
-  init_logger_t log(settings.log_file, settings.log_to_console);
-  CUOPT_LOG_INFO("solve_mip_remote (GPU problem) - converting to CPU for remote execution");
-
-  auto cpu_problem = gpu_problem_to_cpu(gpu_problem);
-
-  // Call CPU remote solver (returns unique_ptr<mip_solution_interface_t>)
-  auto cpu_solution_interface = solve_mip_remote(cpu_problem, settings);
-
-  // Convert CPU solution back to GPU solution (since we started with a GPU problem)
-  auto gpu_solution = cpu_solution_interface->to_gpu_solution(rmm::cuda_stream_per_thread);
-  return std::make_unique<gpu_mip_solution_t<i_t, f_t>>(std::move(gpu_solution));
-}
-
 // Explicit template instantiations for remote execution stubs
 template std::unique_ptr<lp_solution_interface_t<int, double>> solve_lp_remote(
   cpu_optimization_problem_t<int, double> const&,
@@ -248,13 +117,4 @@ template std::unique_ptr<lp_solution_interface_t<int, double>> solve_lp_remote(
 template std::unique_ptr<mip_solution_interface_t<int, double>> solve_mip_remote(
   cpu_optimization_problem_t<int, double> const&, mip_solver_settings_t<int, double> const&);
 
-template std::unique_ptr<lp_solution_interface_t<int, double>> solve_lp_remote(
-  gpu_optimization_problem_t<int, double> const&,
-  pdlp_solver_settings_t<int, double> const&,
-  bool,
-  bool);
-
-template std::unique_ptr<mip_solution_interface_t<int, double>> solve_mip_remote(
-  gpu_optimization_problem_t<int, double> const&, mip_solver_settings_t<int, double> const&);
-
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp
index aebe87b140..b8e0075733 100644
--- a/cpp/src/pdlp/translate.hpp
+++ b/cpp/src/pdlp/translate.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 
 #include <dual_simplex/presolve.hpp>
 #include <dual_simplex/sparse_matrix.hpp>
diff --git a/cpp/src/pdlp/utilities/cython_solve.cu b/cpp/src/pdlp/utilities/cython_solve.cu
index 200ffe49e2..28e750d150 100644
--- a/cpp/src/pdlp/utilities/cython_solve.cu
+++ b/cpp/src/pdlp/utilities/cython_solve.cu
@@ -7,10 +7,10 @@
 
 #include <cuopt/error.hpp>
 #include <cuopt/linear_programming/backend_selection.hpp>
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
-#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem_utils.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
@@ -108,16 +108,16 @@ std::unique_ptr<solver_ret_t> call_solve(
     rmm::cuda_stream stream(static_cast<rmm::cuda_stream::flags>(flags));
     const raft::handle_t handle_{stream};
 
-    auto gpu_problem = cuopt::linear_programming::gpu_optimization_problem_t<int, double>(&handle_);
+    auto problem = cuopt::linear_programming::optimization_problem_t<int, double>(&handle_);
     cuopt::linear_programming::populate_from_data_model_view(
-      &gpu_problem, data_model, solver_settings, &handle_);
+      &problem, data_model, solver_settings, &handle_);
 
     // Call appropriate solve function and convert to ret struct
-    if (gpu_problem.get_problem_category() == linear_programming::problem_category_t::LP) {
+    if (problem.get_problem_category() == linear_programming::problem_category_t::LP) {
       // Solve and get solution interface pointer
       auto lp_solution_ptr =
         std::unique_ptr<linear_programming::lp_solution_interface_t<int, double>>(
-          call_solve_lp(&gpu_problem, solver_settings->get_pdlp_settings(), is_batch_mode));
+          call_solve_lp(&problem, solver_settings->get_pdlp_settings(), is_batch_mode));
 
       response.lp_ret       = lp_solution_ptr->to_python_lp_ret();
       response.problem_type = linear_programming::problem_category_t::LP;
@@ -143,7 +143,7 @@ std::unique_ptr<solver_ret_t> call_solve(
       // MIP solve
       auto mip_solution_ptr =
         std::unique_ptr<linear_programming::mip_solution_interface_t<int, double>>(
-          call_solve_mip(&gpu_problem, solver_settings->get_mip_settings()));
+          call_solve_mip(&problem, solver_settings->get_mip_settings()));
 
       response.mip_ret      = mip_solution_ptr->to_python_mip_ret();
       response.problem_type = linear_programming::problem_category_t::MIP;
@@ -173,8 +173,8 @@ std::unique_ptr<solver_ret_t> call_solve(
     }
 
   } else {
-    // CPU memory backend: No CUDA resources needed at problem-creation time
-    auto cpu_problem = cuopt::linear_programming::cpu_optimization_problem_t<int, double>(nullptr);
+    // CPU memory backend: pure data container, no CUDA resources needed
+    auto cpu_problem = cuopt::linear_programming::cpu_optimization_problem_t<int, double>();
     cuopt::linear_programming::populate_from_data_model_view(
       &cpu_problem, data_model, solver_settings, nullptr);
 
diff --git a/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu b/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu
index ffbe356a3b..ddee8a12c5 100644
--- a/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu
+++ b/cpp/tests/linear_programming/unit_tests/optimization_problem_test.cu
@@ -277,8 +277,8 @@ TEST(optimization_problem_t, test_check_problem_validity)
   // Test that n_vars is now set
   EXPECT_EQ(op_problem_.get_n_variables(), 1);
 
-  // Test that n_constraints is not set
-  EXPECT_EQ(op_problem_.get_n_constraints(), 0);
+  // n_constraints is now derived from CSR offsets (size_offsets - 1)
+  EXPECT_EQ(op_problem_.get_n_constraints(), 1);
 
   // Set row type
   char row_type_host[] = {'E'};
@@ -288,7 +288,6 @@ TEST(optimization_problem_t, test_check_problem_validity)
   EXPECT_THROW((problem_checking_t<int, double>::check_problem_representation(op_problem_)),
                cuopt::logic_error);
 
-  // Test that n_constraints is now set
   EXPECT_EQ(op_problem_.get_n_constraints(), 1);
 
   // Set b
diff --git a/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu b/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu
index 1d2989da0c..0146204c87 100644
--- a/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu
+++ b/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu
@@ -15,10 +15,11 @@
  * the MPS-parser-to-problem pipeline and legitimately needs a real file.
  */
 
+#include <cuopt/linear_programming/cpu_optimization_problem.hpp>
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
-#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
-#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem_utils.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <mps_parser/parser.hpp>
@@ -297,24 +298,25 @@ TEST_F(SolutionInterfaceTest, termination_status_int_values)
 TEST_F(SolutionInterfaceTest, gpu_problem_to_optimization_problem)
 {
   raft::handle_t handle;
-  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  auto problem = std::make_unique<optimization_problem_t<int, double>>(&handle);
   populate_tiny_problem(problem.get());
 
   EXPECT_EQ(problem->get_n_variables(), kNVars);
   EXPECT_EQ(problem->get_n_constraints(), kNCons);
 
-  auto concrete = problem->to_optimization_problem();
-  EXPECT_EQ(concrete.get_n_variables(), kNVars);
-  EXPECT_EQ(concrete.get_n_constraints(), kNCons);
+  // GPU problem's to_optimization_problem() returns nullptr (already a GPU problem)
+  auto concrete = problem->to_optimization_problem(&handle);
+  EXPECT_EQ(concrete, nullptr);
 
-  auto obj = cuopt::host_copy(concrete.get_objective_coefficients(), handle.get_stream());
+  // Verify the data is still accessible directly on the problem
+  auto obj = cuopt::host_copy(problem->get_objective_coefficients(), handle.get_stream());
   ASSERT_EQ(static_cast<int>(obj.size()), kNVars);
   for (int i = 0; i < kNVars; ++i) {
     EXPECT_NEAR(obj[i], kObj[i], 1e-9);
   }
 
-  auto lb = cuopt::host_copy(concrete.get_variable_lower_bounds(), handle.get_stream());
-  auto ub = cuopt::host_copy(concrete.get_variable_upper_bounds(), handle.get_stream());
+  auto lb = cuopt::host_copy(problem->get_variable_lower_bounds(), handle.get_stream());
+  auto ub = cuopt::host_copy(problem->get_variable_upper_bounds(), handle.get_stream());
   ASSERT_EQ(static_cast<int>(lb.size()), kNVars);
   ASSERT_EQ(static_cast<int>(ub.size()), kNVars);
   for (int i = 0; i < kNVars; ++i) {
@@ -322,7 +324,7 @@ TEST_F(SolutionInterfaceTest, gpu_problem_to_optimization_problem)
     EXPECT_NEAR(ub[i], kVarUb[i], 1e-9);
   }
 
-  auto vals = cuopt::host_copy(concrete.get_constraint_matrix_values(), handle.get_stream());
+  auto vals = cuopt::host_copy(problem->get_constraint_matrix_values(), handle.get_stream());
   ASSERT_EQ(static_cast<int>(vals.size()), kNnz);
   for (int i = 0; i < kNnz; ++i) {
     EXPECT_NEAR(vals[i], kCsrVal[i], 1e-9);
@@ -332,30 +334,31 @@ TEST_F(SolutionInterfaceTest, gpu_problem_to_optimization_problem)
 TEST_F(SolutionInterfaceTest, cpu_problem_to_optimization_problem)
 {
   raft::handle_t handle;
-  auto problem = std::make_unique<cpu_optimization_problem_t<int, double>>(&handle);
+  auto problem = std::make_unique<cpu_optimization_problem_t<int, double>>();
   populate_tiny_problem(problem.get());
 
   EXPECT_EQ(problem->get_n_variables(), kNVars);
   EXPECT_EQ(problem->get_n_constraints(), kNCons);
 
-  auto concrete = problem->to_optimization_problem();
-  EXPECT_EQ(concrete.get_n_variables(), kNVars);
-  EXPECT_EQ(concrete.get_n_constraints(), kNCons);
+  auto concrete = problem->to_optimization_problem(&handle);
+  ASSERT_NE(concrete, nullptr);
+  EXPECT_EQ(concrete->get_n_variables(), kNVars);
+  EXPECT_EQ(concrete->get_n_constraints(), kNCons);
 
-  auto obj = cuopt::host_copy(concrete.get_objective_coefficients(), handle.get_stream());
+  auto obj = cuopt::host_copy(concrete->get_objective_coefficients(), handle.get_stream());
   ASSERT_EQ(static_cast<int>(obj.size()), kNVars);
   for (int i = 0; i < kNVars; ++i) {
     EXPECT_NEAR(obj[i], kObj[i], 1e-9);
   }
 
-  auto lb = cuopt::host_copy(concrete.get_variable_lower_bounds(), handle.get_stream());
-  auto ub = cuopt::host_copy(concrete.get_variable_upper_bounds(), handle.get_stream());
+  auto lb = cuopt::host_copy(concrete->get_variable_lower_bounds(), handle.get_stream());
+  auto ub = cuopt::host_copy(concrete->get_variable_upper_bounds(), handle.get_stream());
   for (int i = 0; i < kNVars; ++i) {
     EXPECT_NEAR(lb[i], kVarLb[i], 1e-9);
     EXPECT_NEAR(ub[i], kVarUb[i], 1e-9);
   }
 
-  auto vals = cuopt::host_copy(concrete.get_constraint_matrix_values(), handle.get_stream());
+  auto vals = cuopt::host_copy(concrete->get_constraint_matrix_values(), handle.get_stream());
   ASSERT_EQ(static_cast<int>(vals.size()), kNnz);
   for (int i = 0; i < kNnz; ++i) {
     EXPECT_NEAR(vals[i], kCsrVal[i], 1e-9);
@@ -401,52 +404,7 @@ TEST_F(SolutionInterfaceTest, mps_data_model_to_optimization_problem)
 // Solution conversion tests (hand-constructed, known values)
 // =============================================================================
 
-TEST_F(SolutionInterfaceTest, cpu_lp_solution_to_gpu)
-{
-  auto cpu_sol = make_cpu_lp_solution(/*with_warmstart=*/false);
-
-  auto orig_primal = cpu_sol->get_primal_solution_host();
-  ASSERT_EQ(orig_primal.size(), static_cast<size_t>(kNVars));
-
-  auto gpu_sol = cpu_sol->to_gpu_solution(rmm::cuda_stream_per_thread);
-
-  EXPECT_NEAR(gpu_sol.get_objective_value(0), -42.0, 1e-9);
-  EXPECT_EQ(static_cast<int>(gpu_sol.get_termination_status()),
-            static_cast<int>(pdlp_termination_status_t::Optimal));
-
-  auto gpu_primal = cuopt::host_copy(gpu_sol.get_primal_solution(), rmm::cuda_stream_per_thread);
-  ASSERT_EQ(gpu_primal.size(), orig_primal.size());
-  for (size_t i = 0; i < gpu_primal.size(); ++i) {
-    EXPECT_NEAR(gpu_primal[i], orig_primal[i], 1e-9);
-  }
-
-  // Verify all termination fields survive the round-trip
-  auto info = gpu_sol.get_additional_termination_information(0);
-  EXPECT_NEAR(info.l2_primal_residual, 1e-8, 1e-12);
-  EXPECT_NEAR(info.l2_dual_residual, 2e-8, 1e-12);
-  EXPECT_NEAR(info.gap, 0.5, 1e-9);
-  EXPECT_EQ(info.number_of_steps_taken, 100);
-  EXPECT_TRUE(info.solved_by_pdlp);
-}
-
-TEST_F(SolutionInterfaceTest, cpu_mip_solution_to_gpu)
-{
-  auto cpu_sol = make_cpu_mip_solution();
-
-  auto gpu_sol = cpu_sol->to_gpu_solution(rmm::cuda_stream_per_thread);
-
-  EXPECT_NEAR(gpu_sol.get_objective_value(), -99.0, 1e-9);
-  EXPECT_EQ(static_cast<int>(gpu_sol.get_termination_status()),
-            static_cast<int>(mip_termination_status_t::Optimal));
-
-  auto host_sol = cuopt::host_copy(gpu_sol.get_solution(), rmm::cuda_stream_per_thread);
-  ASSERT_EQ(host_sol.size(), static_cast<size_t>(kNVars));
-  EXPECT_NEAR(host_sol[0], 1.0, 1e-9);
-  EXPECT_NEAR(host_sol[1], 0.0, 1e-9);
-  EXPECT_NEAR(host_sol[2], 1.0, 1e-9);
-}
-
-TEST_F(SolutionInterfaceTest, gpu_lp_solution_to_python_ret)
+TEST_F(SolutionInterfaceTest, lp_solution_to_python_ret)
 {
   auto sol        = make_gpu_lp_solution();
   auto python_ret = sol.to_python_lp_ret();
@@ -464,7 +422,7 @@ TEST_F(SolutionInterfaceTest, cpu_lp_solution_to_python_ret)
   EXPECT_NEAR(python_ret.primal_objective_, -42.0, 1e-9);
 }
 
-TEST_F(SolutionInterfaceTest, gpu_mip_solution_to_python_ret)
+TEST_F(SolutionInterfaceTest, mip_solution_to_python_ret)
 {
   auto sol        = make_gpu_mip_solution();
   auto python_ret = sol.to_python_mip_ret();
@@ -482,38 +440,6 @@ TEST_F(SolutionInterfaceTest, cpu_mip_solution_to_python_ret)
   EXPECT_NEAR(python_ret.objective_, -99.0, 1e-9);
 }
 
-// =============================================================================
-// Warmstart conversion tests (hand-constructed)
-// =============================================================================
-
-TEST_F(SolutionInterfaceTest, cpu_warmstart_to_gpu)
-{
-  auto cpu_sol = make_cpu_lp_solution(/*with_warmstart=*/true);
-
-  ASSERT_TRUE(cpu_sol->has_warm_start_data());
-
-  auto orig_primal = cpu_sol->get_current_primal_solution_host();
-  auto orig_dual   = cpu_sol->get_current_dual_solution_host();
-  ASSERT_EQ(orig_primal.size(), static_cast<size_t>(kNVars));
-  ASSERT_EQ(orig_dual.size(), static_cast<size_t>(kNCons));
-
-  auto gpu_sol = cpu_sol->to_gpu_solution(rmm::cuda_stream_per_thread);
-
-  auto& ws_data  = gpu_sol.get_pdlp_warm_start_data();
-  auto ws_primal = cuopt::host_copy(ws_data.current_primal_solution_, rmm::cuda_stream_per_thread);
-  auto ws_dual   = cuopt::host_copy(ws_data.current_dual_solution_, rmm::cuda_stream_per_thread);
-
-  ASSERT_EQ(ws_primal.size(), orig_primal.size());
-  ASSERT_EQ(ws_dual.size(), orig_dual.size());
-
-  for (size_t i = 0; i < ws_primal.size(); ++i) {
-    EXPECT_NEAR(ws_primal[i], orig_primal[i], 1e-9);
-  }
-  for (size_t i = 0; i < ws_dual.size(); ++i) {
-    EXPECT_NEAR(ws_dual[i], orig_dual[i], 1e-9);
-  }
-}
-
 // =============================================================================
 // Problem interface copy_to_host tests (hand-constructed)
 // =============================================================================
@@ -521,7 +447,7 @@ TEST_F(SolutionInterfaceTest, cpu_warmstart_to_gpu)
 TEST_F(SolutionInterfaceTest, gpu_problem_copy_to_host_methods)
 {
   raft::handle_t handle;
-  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  auto problem = std::make_unique<optimization_problem_t<int, double>>(&handle);
   populate_tiny_problem(problem.get());
 
   std::vector<double> obj(kNVars);