diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index 4ee0224845..7caf7aeeb2 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -52,12 +52,20 @@
 #define CUOPT_MIP_ABSOLUTE_TOLERANCE          "mip_absolute_tolerance"
 #define CUOPT_MIP_RELATIVE_TOLERANCE          "mip_relative_tolerance"
 #define CUOPT_MIP_INTEGRALITY_TOLERANCE       "mip_integrality_tolerance"
-#define CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING "mip_batch_pdlp_strong_branching"
 #define CUOPT_MIP_ABSOLUTE_GAP                "mip_absolute_gap"
 #define CUOPT_MIP_RELATIVE_GAP                "mip_relative_gap"
 #define CUOPT_MIP_HEURISTICS_ONLY             "mip_heuristics_only"
 #define CUOPT_MIP_SCALING                     "mip_scaling"
 #define CUOPT_MIP_PRESOLVE                    "mip_presolve"
+#define CUOPT_MIP_CUT_PASSES                  "mip_cut_passes"
+#define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts"
+#define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS   "mip_mixed_integer_gomory_cuts"
+#define CUOPT_MIP_KNAPSACK_CUTS               "mip_knapsack_cuts"
+#define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS  "mip_strong_chvatal_gomory_cuts"
+#define CUOPT_MIP_REDUCED_COST_STRENGTHENING  "mip_reduced_cost_strengthening"
+#define CUOPT_MIP_CUT_CHANGE_THRESHOLD        "mip_cut_change_threshold"
+#define CUOPT_MIP_CUT_MIN_ORTHOGONALITY       "mip_cut_min_orthogonality"
+#define CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING "mip_batch_pdlp_strong_branching"
 #define CUOPT_SOLUTION_FILE                   "solution_file"
 #define CUOPT_NUM_CPU_THREADS                 "num_cpu_threads"
 #define CUOPT_NUM_GPUS                        "num_gpus"
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
index 326d7f76ad..863e5d66d6 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
@@ -83,12 +83,22 @@ class mip_solver_settings_t {
   friend class problem_checking_t;
   tolerances_t tolerances;
 
-  f_t time_limit       = std::numeric_limits<f_t>::infinity();
-  bool heuristics_only = false;
-  i_t num_cpu_threads  = -1;  // -1 means use default number of threads in branch and bound
-  i_t num_gpus         = 1;
+  f_t time_limit                = std::numeric_limits<f_t>::infinity();
+  i_t node_limit                = std::numeric_limits<i_t>::max();
+  bool heuristics_only          = false;
+  i_t num_cpu_threads           = -1;  // -1 means use default number of threads in branch and bound
+  i_t max_cut_passes            = 10;  // number of cut passes to make
+  i_t mir_cuts                  = -1;
+  i_t mixed_integer_gomory_cuts = -1;
+  i_t knapsack_cuts             = -1;
+  i_t strong_chvatal_gomory_cuts      = -1;
+  i_t reduced_cost_strengthening      = -1;
+  f_t cut_change_threshold            = 1e-3;
+  f_t cut_min_orthogonality           = 0.5;
   i_t mip_batch_pdlp_strong_branching = 0;
+  i_t num_gpus                        = 1;
   bool log_to_console                 = true;
+
   std::string log_file;
   std::string sol_file;
   std::string user_problem_file;
diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt
index af1415fa9c..5405735043 100644
--- a/cpp/src/dual_simplex/CMakeLists.txt
+++ b/cpp/src/dual_simplex/CMakeLists.txt
@@ -10,6 +10,7 @@ set(DUAL_SIMPLEX_SRC_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/basis_updates.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/bound_flipping_ratio_test.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/cuts.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/crossover.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/folding.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/initial_basis.cpp
diff --git a/cpp/src/dual_simplex/barrier.cu b/cpp/src/dual_simplex/barrier.cu
index a883232959..5eef97bb8a 100644
--- a/cpp/src/dual_simplex/barrier.cu
+++ b/cpp/src/dual_simplex/barrier.cu
@@ -681,7 +681,7 @@ class iteration_data_t {
           solve_status = chol->solve(U_col, M_col);
           if (solve_status != 0) { return solve_status; }
           if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
-            return -2;
+            return CONCURRENT_HALT_RETURN;
           }
           M.set_column(k, M_col);
 
@@ -700,7 +700,7 @@ class iteration_data_t {
           AD_dense.transpose_multiply(
             1.0, M.values.data() + k * M.m, 0.0, H.values.data() + k * H.m);
           if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
-            return -2;
+            return CONCURRENT_HALT_RETURN;
           }
         }
 
@@ -1745,7 +1745,7 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
   } else {
     status = data.chol->factorize(data.device_ADAT);
   }
-  if (status == -2) { return -2; }
+  if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; }
   if (status != 0) {
     settings.log.printf("Initial factorization failed\n");
     return -1;
@@ -2309,7 +2309,7 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
     data.num_factorizations++;
 
     data.has_solve_info = false;
-    if (status == -2) { return -2; }
+    if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; }
 
     if (status < 0) {
       settings.log.printf("Factorization failed.\n");
@@ -2411,7 +2411,7 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
       // TODO Chris, we need to write to cpu because dx is used outside
       // Can't we also GPUify what's usinng this dx?
       raft::copy(dy.data(), data.d_dy_.data(), dy.size(), stream_view_);
-      if (solve_status == -2) { return -2; }
+      if (solve_status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; }
       if (solve_status < 0) {
         settings.log.printf("Linear solve failed\n");
         return -1;
diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp
index f5cd54053b..2ee5e1d01f 100644
--- a/cpp/src/dual_simplex/basis_solves.cpp
+++ b/cpp/src/dual_simplex/basis_solves.cpp
@@ -363,7 +363,7 @@ i_t factorize_basis(const csc_matrix_t<i_t, f_t>& A,
                                  S_perm_inv);
         if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
           settings.log.printf("Concurrent halt\n");
-          return -1;
+          return CONCURRENT_HALT_RETURN;
         }
         if (Srank != Sdim) {
           // Get the rank deficient columns
@@ -582,7 +582,7 @@ i_t factorize_basis(const csc_matrix_t<i_t, f_t>& A,
   }
   if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
     settings.log.printf("Concurrent halt\n");
-    return -1;
+    return CONCURRENT_HALT_RETURN;
   }
   if (verbose) {
     printf("Right Lnz+Unz %d t %.3f\n", L.col_start[m] + U.col_start[m], toc(fact_start));
diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 2c781a5156..1d94f41c7f 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -1108,6 +1108,217 @@ i_t basis_update_t<i_t, f_t>::lower_triangular_multiply(const csc_matrix_t<i_t,
   return new_nz;
 }
 
+// Start of middle product form: basis_update_mpf_t
+
+template <typename i_t, typename f_t>
+i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts_basic)
+{
+  const i_t m = L0_.m;
+
+  // Solve for U^T W^T = C_B^T
+  // We do this one row at a time of C_B
+  csc_matrix_t<i_t, f_t> WT(m, cuts_basic.m, 0);
+
+  i_t WT_nz = 0;
+  for (i_t k = 0; k < cuts_basic.m; k++) {
+    sparse_vector_t<i_t, f_t> rhs(cuts_basic, k);
+    u_transpose_solve(rhs);
+    WT.col_start[k] = WT_nz;
+    for (i_t q = 0; q < rhs.i.size(); q++) {
+      WT.i.push_back(rhs.i[q]);
+      WT.x.push_back(rhs.x[q]);
+      WT_nz++;
+    }
+  }
+  WT.col_start[cuts_basic.m] = WT_nz;
+
+#ifdef CHECK_W
+  {
+    for (i_t k = 0; k < cuts_basic.m; k++) {
+      std::vector<f_t> WT_col(m, 0.0);
+      WT.load_a_column(k, WT_col);
+      std::vector<f_t> CBT_col(m, 0.0);
+      matrix_transpose_vector_multiply(U0_, 1.0, WT_col, 0.0, CBT_col);
+      sparse_vector_t<i_t, f_t> CBT_col_sparse(cuts_basic, k);
+      std::vector<f_t> CBT_col_dense(m);
+      CBT_col_sparse.to_dense(CBT_col_dense);
+      for (i_t h = 0; h < m; h++) {
+        if (std::abs(CBT_col_dense[h] - CBT_col[h]) > 1e-6) {
+          printf("W: col %d CBT_col_dense[%d] = %e CBT_col[%d] = %e\n",
+                 k,
+                 h,
+                 CBT_col_dense[h],
+                 h,
+                 CBT_col[h]);
+          exit(1);
+        }
+      }
+    }
+  }
+#endif
+
+  csc_matrix_t<i_t, f_t> V(cuts_basic.m, m, 0);
+  if (num_updates_ > 0) {
+    // W = V T_0 ... T_{num_updates_ - 1}
+    // or V = W T_{num_updates_ - 1}^{-1} ... T_0^{-1}
+    // or V^T = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T
+    // We can compute V^T column by column so that we have
+    // V^T(:, h) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
+    // or
+    // V(h, :) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
+    // So we can form V row by row in CSR and then covert it to CSC
+    // for appending to L0
+
+    csr_matrix_t<i_t, f_t> V_row(cuts_basic.m, m, 0);
+    i_t V_nz           = 0;
+    const f_t zero_tol = 1e-13;
+    for (i_t h = 0; h < cuts_basic.m; h++) {
+      sparse_vector_t<i_t, f_t> rhs(WT, h);
+      scatter_into_workspace(rhs);
+      i_t nz = rhs.i.size();
+      for (i_t k = num_updates_ - 1; k >= 0; --k) {
+        // T_k^{-T} = ( I - v u^T/(1 + u^T v))
+        // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu
+
+        const i_t u_col = 2 * k;
+        const i_t v_col = 2 * k + 1;
+        const f_t mu    = mu_values_[k];
+
+        // dot = u^T * b
+        f_t dot         = dot_product(u_col, xi_workspace_, x_workspace_);
+        const f_t theta = dot / mu;
+        if (std::abs(theta) > zero_tol) {
+          add_sparse_column(S_, v_col, -theta, xi_workspace_, nz, x_workspace_);
+        }
+      }
+      gather_into_sparse_vector(nz, rhs);
+      V_row.row_start[h] = V_nz;
+      for (i_t q = 0; q < rhs.i.size(); q++) {
+        V_row.j.push_back(rhs.i[q]);
+        V_row.x.push_back(rhs.x[q]);
+        V_nz++;
+      }
+    }
+    V_row.row_start[cuts_basic.m] = V_nz;
+
+    V_row.to_compressed_col(V);
+
+#ifdef CHECK_V
+    csc_matrix_t<i_t, f_t> CB_col(cuts_basic.m, m, 0);
+    cuts_basic.to_compressed_col(CB_col);
+    for (i_t k = 0; k < m; k++) {
+      std::vector<f_t> U_col(m, 0.0);
+      U0_.load_a_column(k, U_col);
+      for (i_t h = num_updates_ - 1; h >= 0; --h) {
+        // T_h = ( I + u_h v_h^T)
+        // T_h * x = x + u_h * v_h^T * x = x + theta * u_h
+        const i_t u_col     = 2 * h;
+        const i_t v_col     = 2 * h + 1;
+        f_t theta           = dot_product(v_col, U_col);
+        const i_t col_start = S_.col_start[u_col];
+        const i_t col_end   = S_.col_start[u_col + 1];
+        for (i_t p = col_start; p < col_end; ++p) {
+          const i_t i = S_.i[p];
+          U_col[i] += theta * S_.x[p];
+        }
+      }
+      std::vector<f_t> CB_column(cuts_basic.m, 0.0);
+      matrix_vector_multiply(V, 1.0, U_col, 0.0, CB_column);
+      std::vector<f_t> CB_col_dense(cuts_basic.m);
+      CB_col.load_a_column(k, CB_col_dense);
+      for (i_t l = 0; l < cuts_basic.m; l++) {
+        if (std::abs(CB_col_dense[l] - CB_column[l]) > 1e-6) {
+          printf("V: col %d CB_col_dense[%d] = %e CB_column[%d] = %e\n",
+                 k,
+                 l,
+                 CB_col_dense[l],
+                 l,
+                 CB_column[l]);
+          exit(1);
+        }
+      }
+    }
+#endif
+  } else {
+    // W = V
+    WT.transpose(V);
+  }
+
+  // Extend u_i, v_i for i = 0, ..., num_updates_ - 1
+  S_.m += cuts_basic.m;
+
+  // Adjust L and U
+  // L = [ L0  0 ]
+  //     [ V   I ]
+
+  i_t V_nz = V.col_start[m];
+  i_t L_nz = L0_.col_start[m];
+  csc_matrix_t<i_t, f_t> new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m);
+  i_t predicted_nz = L_nz + V_nz + cuts_basic.m;
+  L_nz             = 0;
+  for (i_t j = 0; j < m; ++j) {
+    new_L.col_start[j]  = L_nz;
+    const i_t col_start = L0_.col_start[j];
+    const i_t col_end   = L0_.col_start[j + 1];
+    for (i_t p = col_start; p < col_end; ++p) {
+      new_L.i[L_nz] = L0_.i[p];
+      new_L.x[L_nz] = L0_.x[p];
+      L_nz++;
+    }
+    const i_t V_col_start = V.col_start[j];
+    const i_t V_col_end   = V.col_start[j + 1];
+    for (i_t p = V_col_start; p < V_col_end; ++p) {
+      new_L.i[L_nz] = V.i[p] + m;
+      new_L.x[L_nz] = V.x[p];
+      L_nz++;
+    }
+  }
+  for (i_t j = m; j < m + cuts_basic.m; ++j) {
+    new_L.col_start[j] = L_nz;
+    new_L.i[L_nz]      = j;
+    new_L.x[L_nz]      = 1.0;
+    L_nz++;
+  }
+  new_L.col_start[m + cuts_basic.m] = L_nz;
+  assert(L_nz == predicted_nz);
+
+  L0_ = new_L;
+
+  // Adjust U
+  // U = [ U0 0 ]
+  //     [ 0  I ]
+
+  i_t U_nz = U0_.col_start[m];
+  U0_.col_start.resize(m + cuts_basic.m + 1);
+  U0_.i.resize(U_nz + cuts_basic.m);
+  U0_.x.resize(U_nz + cuts_basic.m);
+  for (i_t k = m; k < m + cuts_basic.m; ++k) {
+    U0_.col_start[k] = U_nz;
+    U0_.i[U_nz]      = k;
+    U0_.x[U_nz]      = 1.0;
+    U_nz++;
+  }
+  U0_.col_start[m + cuts_basic.m] = U_nz;
+  U0_.n                           = m + cuts_basic.m;
+  U0_.m                           = m + cuts_basic.m;
+
+  compute_transposes();
+
+  // Adjust row_permutation_ and inverse_row_permutation_
+  row_permutation_.resize(m + cuts_basic.m);
+  inverse_row_permutation_.resize(m + cuts_basic.m);
+  for (i_t k = m; k < m + cuts_basic.m; ++k) {
+    row_permutation_[k] = k;
+  }
+  inverse_permutation(row_permutation_, inverse_row_permutation_);
+
+  // Adjust workspace sizes
+  xi_workspace_.resize(2 * (m + cuts_basic.m), 0);
+  x_workspace_.resize(m + cuts_basic.m, 0.0);
+
+  return 0;
+}
+
 template <typename i_t, typename f_t>
 void basis_update_mpf_t<i_t, f_t>::gather_into_sparse_vector(i_t nz,
                                                              sparse_vector_t<i_t, f_t>& out) const
@@ -2057,16 +2268,18 @@ int basis_update_mpf_t<i_t, f_t>::refactor_basis(
 
   if (L0_.m != A.m) { resize(A.m); }
   std::vector<i_t> q;
-  if (factorize_basis(A,
-                      settings,
-                      basic_list,
-                      L0_,
-                      U0_,
-                      row_permutation_,
-                      inverse_row_permutation_,
-                      q,
-                      deficient,
-                      slacks_needed) == -1) {
+  i_t status = factorize_basis(A,
+                               settings,
+                               basic_list,
+                               L0_,
+                               U0_,
+                               row_permutation_,
+                               inverse_row_permutation_,
+                               q,
+                               deficient,
+                               slacks_needed);
+  if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; }
+  if (status == -1) {
     settings.log.debug("Initial factorization failed\n");
     basis_repair(
       A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
@@ -2088,16 +2301,18 @@ int basis_update_mpf_t<i_t, f_t>::refactor_basis(
     }
 #endif
 
-    if (factorize_basis(A,
-                        settings,
-                        basic_list,
-                        L0_,
-                        U0_,
-                        row_permutation_,
-                        inverse_row_permutation_,
-                        q,
-                        deficient,
-                        slacks_needed) == -1) {
+    status = factorize_basis(A,
+                             settings,
+                             basic_list,
+                             L0_,
+                             U0_,
+                             row_permutation_,
+                             inverse_row_permutation_,
+                             q,
+                             deficient,
+                             slacks_needed);
+    if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; }
+    if (status == -1) {
 #ifdef CHECK_L_FACTOR
       if (L0_.check_matrix() == -1) { settings.log.printf("Bad L after basis repair\n"); }
 #endif
diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp
index afd4f4c9ab..8eca3ba8a9 100644
--- a/cpp/src/dual_simplex/basis_updates.hpp
+++ b/cpp/src/dual_simplex/basis_updates.hpp
@@ -291,6 +291,8 @@ class basis_update_mpf_t {
     reset_stats();
   }
 
+  i_t append_cuts(const csr_matrix_t<i_t, f_t>& cuts_basic);
+
   f_t estimate_solution_density(f_t rhs_nz, f_t sum, i_t& num_calls, bool& use_hypersparse) const
   {
     num_calls++;
diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
index 8e58c24f01..fac65b8140 100644
--- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
+++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -84,10 +84,7 @@ i_t bound_flipping_ratio_test_t<i_t, f_t>::single_pass(i_t start,
   step_length       = min_val;
   nonbasic_entering = candidate;
   // this should be temporary, find root causes where the candidate is not filled
-  if (nonbasic_entering == -1) {
-    // -1,-2 and -3 are reserved for other things
-    return -4;
-  }
+  if (nonbasic_entering == -1) { return RATIO_TEST_NUMERICAL_ISSUES; }
   const i_t j = entering_index = nonbasic_list_[nonbasic_entering];
 
   constexpr bool verbose = false;
@@ -123,16 +120,16 @@ i_t bound_flipping_ratio_test_t<i_t, f_t>::compute_step_length(f_t& step_length,
   if constexpr (verbose) { settings_.log.printf("Initial breakpoints %d\n", num_breakpoints); }
   if (num_breakpoints == 0) {
     nonbasic_entering = -1;
-    return -1;
+    return RATIO_TEST_NO_ENTERING_VARIABLE;
   }
 
   f_t slope          = slope_;
   nonbasic_entering  = -1;
-  i_t entering_index = -1;
+  i_t entering_index = RATIO_TEST_NO_ENTERING_VARIABLE;
 
   i_t k_idx = single_pass(
     0, num_breakpoints, indicies, ratios, slope, step_length, nonbasic_entering, entering_index);
-  if (k_idx == -4) { return -4; }
+  if (k_idx == RATIO_TEST_NUMERICAL_ISSUES) { return RATIO_TEST_NUMERICAL_ISSUES; }
   bool continue_search = k_idx >= 0 && num_breakpoints > 1 && slope > 0.0;
   if (!continue_search) {
     if constexpr (0) {
@@ -255,11 +252,11 @@ void bound_flipping_ratio_test_t<i_t, f_t>::heap_passes(const std::vector<i_t>&
     }
 
     if (toc(start_time_) > settings_.time_limit) {
-      entering_index = -2;
+      entering_index = RATIO_TEST_TIME_LIMIT;
       return;
     }
     if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
-      entering_index = -3;
+      entering_index = CONCURRENT_HALT_RETURN;
       return;
     }
   }
diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp
index d3164c623d..51b00b1097 100644
--- a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp
+++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -8,11 +8,17 @@
 
 #include <dual_simplex/initial_basis.hpp>
 #include <dual_simplex/simplex_solver_settings.hpp>
+#include <dual_simplex/types.hpp>
 
 #include <vector>
 
 namespace cuopt::linear_programming::dual_simplex {
 
+#define RATIO_TEST_NO_ENTERING_VARIABLE -1
+#define RATIO_TEST_CONCURRENT_LIMIT     CONCURRENT_HALT_RETURN  // -2
+#define RATIO_TEST_TIME_LIMIT           -3
+#define RATIO_TEST_NUMERICAL_ISSUES     -4
+
 template <typename i_t, typename f_t>
 class bound_flipping_ratio_test_t {
  public:
diff --git a/cpp/src/dual_simplex/bounds_strengthening.cpp b/cpp/src/dual_simplex/bounds_strengthening.cpp
index 4114e7e097..2b20940d29 100644
--- a/cpp/src/dual_simplex/bounds_strengthening.cpp
+++ b/cpp/src/dual_simplex/bounds_strengthening.cpp
@@ -59,8 +59,7 @@ bounds_strengthening_t<i_t, f_t>::bounds_strengthening_t(
   const csr_matrix_t<i_t, f_t>& Arow,
   const std::vector<char>& row_sense,
   const std::vector<variable_type_t>& var_types)
-  : bounds_changed(problem.num_cols, false),
-    A(problem.A),
+  : A(problem.A),
     Arow(Arow),
     var_types(var_types),
     delta_min_activity(problem.num_rows),
@@ -91,9 +90,10 @@ bounds_strengthening_t<i_t, f_t>::bounds_strengthening_t(
 
 template <typename i_t, typename f_t>
 bool bounds_strengthening_t<i_t, f_t>::bounds_strengthening(
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  const std::vector<bool>& bounds_changed,
   std::vector<f_t>& lower_bounds,
-  std::vector<f_t>& upper_bounds,
-  const simplex_solver_settings_t<i_t, f_t>& settings)
+  std::vector<f_t>& upper_bounds)
 {
   const i_t m = A.m;
   const i_t n = A.n;
@@ -104,13 +104,13 @@ bool bounds_strengthening_t<i_t, f_t>::bounds_strengthening(
 
   if (!bounds_changed.empty()) {
     std::fill(constraint_changed.begin(), constraint_changed.end(), false);
-    for (i_t i = 0; i < n; ++i) {
-      if (bounds_changed[i]) {
-        const i_t row_start = A.col_start[i];
-        const i_t row_end   = A.col_start[i + 1];
-        for (i_t p = row_start; p < row_end; ++p) {
-          const i_t j           = A.i[p];
-          constraint_changed[j] = true;
+    for (i_t j = 0; j < n; ++j) {
+      if (bounds_changed[j]) {
+        const i_t col_start = A.col_start[j];
+        const i_t col_end   = A.col_start[j + 1];
+        for (i_t p = col_start; p < col_end; ++p) {
+          const i_t i           = A.i[p];
+          constraint_changed[i] = true;
         }
       }
     }
@@ -179,9 +179,9 @@ bool bounds_strengthening_t<i_t, f_t>::bounds_strengthening(
       f_t new_lb = old_lb;
       f_t new_ub = old_ub;
 
-      const i_t row_start = A.col_start[k];
-      const i_t row_end   = A.col_start[k + 1];
-      for (i_t p = row_start; p < row_end; ++p) {
+      const i_t col_start = A.col_start[k];
+      const i_t col_end   = A.col_start[k + 1];
+      for (i_t p = col_start; p < col_end; ++p) {
         const i_t i = A.i[p];
 
         if (!constraint_changed[i]) { continue; }
@@ -210,13 +210,13 @@ bool bounds_strengthening_t<i_t, f_t>::bounds_strengthening(
       new_lb = std::max(new_lb, lower_bounds[k]);
       new_ub = std::min(new_ub, upper_bounds[k]);
 
-      if (new_lb > new_ub + 1e-6) {
+      if (new_lb > new_ub + settings.primal_tol) {
         settings.log.debug(
           "Iter:: %d, Infeasible variable after update %d, %e > %e\n", iter, k, new_lb, new_ub);
         return false;
       }
       if (new_lb != old_lb || new_ub != old_ub) {
-        for (i_t p = row_start; p < row_end; ++p) {
+        for (i_t p = col_start; p < col_end; ++p) {
           const i_t i                = A.i[p];
           constraint_changed_next[i] = true;
         }
@@ -225,8 +225,8 @@ bool bounds_strengthening_t<i_t, f_t>::bounds_strengthening(
       lower[k] = std::min(new_lb, new_ub);
       upper[k] = std::max(new_lb, new_ub);
 
-      bool bounds_changed = lb_updated || ub_updated;
-      if (bounds_changed) { num_bounds_changed++; }
+      bool bounds_updated = lb_updated || ub_updated;
+      if (bounds_updated) { num_bounds_changed++; }
     }
 
     if (num_bounds_changed == 0) { break; }
diff --git a/cpp/src/dual_simplex/bounds_strengthening.hpp b/cpp/src/dual_simplex/bounds_strengthening.hpp
index e7e218b824..b811fb1c18 100644
--- a/cpp/src/dual_simplex/bounds_strengthening.hpp
+++ b/cpp/src/dual_simplex/bounds_strengthening.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -20,11 +20,12 @@ class bounds_strengthening_t {
                          const std::vector<char>& row_sense,
                          const std::vector<variable_type_t>& var_types);
 
-  bool bounds_strengthening(std::vector<f_t>& lower_bounds,
-                            std::vector<f_t>& upper_bounds,
-                            const simplex_solver_settings_t<i_t, f_t>& settings);
-
-  std::vector<bool> bounds_changed;
+  // If bounds_changed is empty, all constraints are scanned for changes.
+  // Otherwise, bounds_changed must be a vector of length n, where n is the number of variables.
+  bool bounds_strengthening(const simplex_solver_settings_t<i_t, f_t>& settings,
+                            const std::vector<bool>& bounds_changed,
+                            std::vector<f_t>& lower_bounds,
+                            std::vector<f_t>& upper_bounds);
 
  private:
   const csc_matrix_t<i_t, f_t>& A;
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index acdc9888a9..7f6e2c1921 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -7,8 +7,10 @@
 
 #include <dual_simplex/branch_and_bound.hpp>
 
+#include <dual_simplex/basis_solves.hpp>
 #include <dual_simplex/bounds_strengthening.hpp>
 #include <dual_simplex/crossover.hpp>
+#include <dual_simplex/cuts.hpp>
 #include <dual_simplex/initial_basis.hpp>
 #include <dual_simplex/logger.hpp>
 #include <dual_simplex/mip_node.hpp>
@@ -122,9 +124,13 @@ bool check_guess(const lp_problem_t<i_t, f_t>& original_lp,
 }
 
 template <typename i_t, typename f_t>
-void set_uninitialized_steepest_edge_norms(std::vector<f_t>& edge_norms)
+void set_uninitialized_steepest_edge_norms(const lp_problem_t<i_t, f_t>& lp,
+                                           const std::vector<i_t>& basic_list,
+                                           std::vector<f_t>& edge_norms)
 {
-  for (i_t j = 0; j < edge_norms.size(); ++j) {
+  if (edge_norms.size() != lp.num_cols) { edge_norms.resize(lp.num_cols, -1.0); }
+  for (i_t k = 0; k < lp.num_rows; k++) {
+    const i_t j = basic_list[k];
     if (edge_norms[j] <= 0.0) { edge_norms[j] = 1e-4; }
   }
 }
@@ -225,7 +231,8 @@ inline char feasible_solution_symbol(bnb_worker_type_t type)
 template <typename i_t, typename f_t>
 branch_and_bound_t<i_t, f_t>::branch_and_bound_t(
   const user_problem_t<i_t, f_t>& user_problem,
-  const simplex_solver_settings_t<i_t, f_t>& solver_settings)
+  const simplex_solver_settings_t<i_t, f_t>& solver_settings,
+  f_t start_time)
   : original_problem_(user_problem),
     settings_(solver_settings),
     original_lp_(user_problem.handle_ptr, 1, 1, 1),
@@ -236,11 +243,36 @@ branch_and_bound_t<i_t, f_t>::branch_and_bound_t(
     pc_(1),
     solver_status_(mip_status_t::UNSET)
 {
-  exploration_stats_.start_time = tic();
+  exploration_stats_.start_time = start_time;
+#ifdef PRINT_CONSTRAINT_MATRIX
+  settings_.log.printf("A");
+  original_problem_.A.print_matrix();
+#endif
+
   dualize_info_t<i_t, f_t> dualize_info;
   convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info);
   full_variable_types(original_problem_, original_lp_, var_types_);
 
+  // Check slack
+#ifdef CHECK_SLACKS
+  assert(new_slacks_.size() == original_lp_.num_rows);
+  for (i_t slack : new_slacks_) {
+    const i_t col_start = original_lp_.A.col_start[slack];
+    const i_t col_end   = original_lp_.A.col_start[slack + 1];
+    const i_t col_len   = col_end - col_start;
+    if (col_len != 1) {
+      settings_.log.printf("Slack %d has %d nzs\n", slack, col_len);
+      assert(col_len == 1);
+    }
+    const i_t i = original_lp_.A.i[col_start];
+    const f_t x = original_lp_.A.x[col_start];
+    if (std::abs(x) != 1.0) {
+      settings_.log.printf("Slack %d row %d has non-unit coefficient %e\n", slack, i, x);
+      assert(std::abs(x) == 1.0);
+    }
+  }
+#endif
+
   upper_bound_ = inf;
 }
 
@@ -267,7 +299,7 @@ void branch_and_bound_t<i_t, f_t>::report_heuristic(f_t obj)
     std::string user_gap = user_mip_gap<f_t>(user_obj, user_lower);
 
     settings_.log.printf(
-      "H                            %+13.6e    %+10.6e                        %s %9.2f\n",
+      "H                            %+13.6e    %+10.6e                               %s %9.2f\n",
       user_obj,
       user_lower,
       user_gap.c_str(),
@@ -280,27 +312,90 @@ void branch_and_bound_t<i_t, f_t>::report_heuristic(f_t obj)
 }
 
 template <typename i_t, typename f_t>
-void branch_and_bound_t<i_t, f_t>::report(char symbol, f_t obj, f_t lower_bound, i_t node_depth)
+void branch_and_bound_t<i_t, f_t>::report(
+  char symbol, f_t obj, f_t lower_bound, i_t node_depth, i_t node_int_infeas)
 {
   update_user_bound(lower_bound);
-  i_t nodes_explored   = exploration_stats_.nodes_explored;
-  i_t nodes_unexplored = exploration_stats_.nodes_unexplored;
-  f_t user_obj         = compute_user_objective(original_lp_, obj);
-  f_t user_lower       = compute_user_objective(original_lp_, lower_bound);
-  f_t iter_node        = exploration_stats_.total_lp_iters / nodes_explored;
-  std::string user_gap = user_mip_gap<f_t>(user_obj, user_lower);
-  settings_.log.printf("%c %10d   %10lu    %+13.6e    %+10.6e  %6d    %7.1e     %s %9.2f\n",
+  const i_t nodes_explored   = exploration_stats_.nodes_explored;
+  const i_t nodes_unexplored = exploration_stats_.nodes_unexplored;
+  const f_t user_obj         = compute_user_objective(original_lp_, obj);
+  const f_t user_lower       = compute_user_objective(original_lp_, lower_bound);
+  const f_t iters            = static_cast<f_t>(exploration_stats_.total_lp_iters);
+  const f_t iter_node        = nodes_explored > 0 ? iters / nodes_explored : iters;
+  const std::string user_gap = user_mip_gap<f_t>(user_obj, user_lower);
+  settings_.log.printf("%c %10d   %10lu    %+13.6e    %+10.6e   %6d %6d   %7.1e     %s %9.2f\n",
                        symbol,
                        nodes_explored,
                        nodes_unexplored,
                        user_obj,
                        user_lower,
+                       node_int_infeas,
                        node_depth,
                        iter_node,
                        user_gap.c_str(),
                        toc(exploration_stats_.start_time));
 }
 
+template <typename i_t, typename f_t>
+i_t branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound,
+                                                            std::vector<f_t>& lower_bounds,
+                                                            std::vector<f_t>& upper_bounds)
+{
+  std::vector<f_t> reduced_costs = root_relax_soln_.z;
+  lower_bounds                   = original_lp_.lower;
+  upper_bounds                   = original_lp_.upper;
+  std::vector<bool> bounds_changed(original_lp_.num_cols, false);
+  const f_t root_obj    = compute_objective(original_lp_, root_relax_soln_.x);
+  const f_t threshold   = 100.0 * settings_.integer_tol;
+  const f_t weaken      = settings_.integer_tol;
+  const f_t fixed_tol   = settings_.fixed_tol;
+  i_t num_improved      = 0;
+  i_t num_fixed         = 0;
+  i_t num_cols_to_check = reduced_costs.size();  // Reduced costs will be smaller than the original
+                                                 // problem because we have added slacks for cuts
+  for (i_t j = 0; j < num_cols_to_check; j++) {
+    if (std::isfinite(reduced_costs[j]) && std::abs(reduced_costs[j]) > threshold) {
+      const f_t lower_j            = original_lp_.lower[j];
+      const f_t upper_j            = original_lp_.upper[j];
+      const f_t abs_gap            = upper_bound - root_obj;
+      f_t reduced_cost_upper_bound = upper_j;
+      f_t reduced_cost_lower_bound = lower_j;
+      if (lower_j > -inf && reduced_costs[j] > 0) {
+        const f_t new_upper_bound = lower_j + abs_gap / reduced_costs[j];
+        reduced_cost_upper_bound  = var_types_[j] == variable_type_t::INTEGER
+                                      ? std::floor(new_upper_bound + weaken)
+                                      : new_upper_bound;
+        if (reduced_cost_upper_bound < upper_j && var_types_[j] == variable_type_t::INTEGER) {
+          num_improved++;
+          upper_bounds[j]   = reduced_cost_upper_bound;
+          bounds_changed[j] = true;
+        }
+      }
+      if (upper_j < inf && reduced_costs[j] < 0) {
+        const f_t new_lower_bound = upper_j + abs_gap / reduced_costs[j];
+        reduced_cost_lower_bound  = var_types_[j] == variable_type_t::INTEGER
+                                      ? std::ceil(new_lower_bound - weaken)
+                                      : new_lower_bound;
+        if (reduced_cost_lower_bound > lower_j && var_types_[j] == variable_type_t::INTEGER) {
+          num_improved++;
+          lower_bounds[j]   = reduced_cost_lower_bound;
+          bounds_changed[j] = true;
+        }
+      }
+      if (var_types_[j] == variable_type_t::INTEGER &&
+          reduced_cost_upper_bound <= reduced_cost_lower_bound + fixed_tol) {
+        num_fixed++;
+      }
+    }
+  }
+
+  if (num_fixed > 0 || num_improved > 0) {
+    settings_.log.printf(
+      "Reduced costs: Found %d improved bounds and %d fixed variables\n", num_improved, num_fixed);
+  }
+  return num_fixed;
+}
+
 template <typename i_t, typename f_t>
 void branch_and_bound_t<i_t, f_t>::update_user_bound(f_t lower_bound)
 {
@@ -312,6 +407,7 @@ void branch_and_bound_t<i_t, f_t>::update_user_bound(f_t lower_bound)
 template <typename i_t, typename f_t>
 void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solution)
 {
+  mutex_original_lp_.lock();
   if (solution.size() != original_problem_.num_cols) {
     settings_.log.printf(
       "Solution size mismatch %ld %d\n", solution.size(), original_problem_.num_cols);
@@ -319,17 +415,29 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
   std::vector<f_t> crushed_solution;
   crush_primal_solution<i_t, f_t>(
     original_problem_, original_lp_, solution, new_slacks_, crushed_solution);
-  f_t obj             = compute_objective(original_lp_, crushed_solution);
+  f_t obj = compute_objective(original_lp_, crushed_solution);
+  mutex_original_lp_.unlock();
   bool is_feasible    = false;
   bool attempt_repair = false;
   mutex_upper_.lock();
-  if (obj < upper_bound_) {
+  f_t current_upper_bound = upper_bound_;
+  mutex_upper_.unlock();
+  if (obj < current_upper_bound) {
     f_t primal_err;
     f_t bound_err;
     i_t num_fractional;
+    mutex_original_lp_.lock();
+    if (crushed_solution.size() != original_lp_.num_cols) {
+      // original problem has been modified since the solution was crushed
+      // we need to re-crush the solution
+      crush_primal_solution<i_t, f_t>(
+        original_problem_, original_lp_, solution, new_slacks_, crushed_solution);
+    }
     is_feasible = check_guess(
       original_lp_, settings_, var_types_, crushed_solution, primal_err, bound_err, num_fractional);
-    if (is_feasible) {
+    mutex_original_lp_.unlock();
+    mutex_upper_.lock();
+    if (is_feasible && obj < upper_bound_) {
       upper_bound_ = obj;
       incumbent_.set_incumbent_solution(obj, crushed_solution);
     } else {
@@ -344,10 +452,11 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
           num_fractional);
       }
     }
+    mutex_upper_.unlock();
   } else {
     settings_.log.debug("Solution objective not better than current upper_bound_. Not accepted.\n");
   }
-  mutex_upper_.unlock();
+
   if (is_feasible) { report_heuristic(obj); }
   if (attempt_repair) {
     mutex_repair_.lock();
@@ -459,6 +568,35 @@ void branch_and_bound_t<i_t, f_t>::repair_heuristic_solutions()
   }
 }
 
+template <typename i_t, typename f_t>
+void branch_and_bound_t<i_t, f_t>::set_solution_at_root(mip_solution_t<i_t, f_t>& solution,
+                                                        const cut_info_t<i_t, f_t>& cut_info)
+{
+  mutex_upper_.lock();
+  incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
+  upper_bound_ = root_objective_;
+  mutex_upper_.unlock();
+
+  print_cut_info(settings_, cut_info);
+
+  // We should be done here
+  uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x);
+  solution.objective          = incumbent_.objective;
+  solution.lower_bound        = root_objective_;
+  solution.nodes_explored     = 0;
+  solution.simplex_iterations = root_relax_soln_.iterations;
+  settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n",
+                       compute_user_objective(original_lp_, root_objective_),
+                       toc(exploration_stats_.start_time));
+
+  if (settings_.solution_callback != nullptr) {
+    settings_.solution_callback(solution.x, solution.objective);
+  }
+  if (settings_.heuristic_preemption_callback != nullptr) {
+    settings_.heuristic_preemption_callback();
+  }
+}
+
 template <typename i_t, typename f_t>
 void branch_and_bound_t<i_t, f_t>::set_final_solution(mip_solution_t<i_t, f_t>& solution,
                                                       f_t lower_bound)
@@ -491,6 +629,9 @@ void branch_and_bound_t<i_t, f_t>::set_final_solution(mip_solution_t<i_t, f_t>&
 
   if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) {
     solver_status_ = mip_status_t::OPTIMAL;
+#ifdef CHECK_CUTS_AGAINST_SAVED_SOLUTION
+    if (settings_.sub_mip == 0) { write_solution_for_cut_verification(original_lp_, incumbent_.x); }
+#endif
     if (gap > 0 && gap <= settings_.absolute_mip_gap_tol) {
       settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n",
                            settings_.absolute_mip_gap_tol);
@@ -542,7 +683,7 @@ void branch_and_bound_t<i_t, f_t>::add_feasible_solution(f_t leaf_objective,
   if (leaf_objective < upper_bound_) {
     incumbent_.set_incumbent_solution(leaf_objective, leaf_solution);
     upper_bound_ = leaf_objective;
-    report(feasible_solution_symbol(thread_type), leaf_objective, get_lower_bound(), leaf_depth);
+    report(feasible_solution_symbol(thread_type), leaf_objective, get_lower_bound(), leaf_depth, 0);
     send_solution = true;
   }
 
@@ -621,11 +762,40 @@ branch_variable_t<i_t> branch_and_bound_t<i_t, f_t>::variable_selection(
   }
 }
 
+template <typename i_t, typename f_t>
+void branch_and_bound_t<i_t, f_t>::initialize_diving_heuristics_settings(
+  std::vector<bnb_worker_type_t>& diving_strategies)
+{
+  diving_strategies.reserve(4);
+
+  if (settings_.diving_settings.pseudocost_diving != 0) {
+    diving_strategies.push_back(bnb_worker_type_t::PSEUDOCOST_DIVING);
+  }
+
+  if (settings_.diving_settings.line_search_diving != 0) {
+    diving_strategies.push_back(bnb_worker_type_t::LINE_SEARCH_DIVING);
+  }
+
+  if (settings_.diving_settings.guided_diving != 0) {
+    diving_strategies.push_back(bnb_worker_type_t::GUIDED_DIVING);
+  }
+
+  if (settings_.diving_settings.coefficient_diving != 0) {
+    diving_strategies.push_back(bnb_worker_type_t::COEFFICIENT_DIVING);
+    calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_);
+  }
+
+  if (diving_strategies.empty()) {
+    settings_.log.printf("Warning: All diving heuristics are disabled!\n");
+  }
+}
+
 template <typename i_t, typename f_t>
 dual::status_t branch_and_bound_t<i_t, f_t>::solve_node_lp(
   mip_node_t<i_t, f_t>* node_ptr,
   lp_problem_t<i_t, f_t>& leaf_problem,
   lp_solution_t<i_t, f_t>& leaf_solution,
+  std::vector<f_t>& leaf_edge_norms,
   basis_update_mpf_t<i_t, f_t>& basis_factors,
   std::vector<i_t>& basic_list,
   std::vector<i_t>& nonbasic_list,
@@ -637,6 +807,41 @@ dual::status_t branch_and_bound_t<i_t, f_t>::solve_node_lp(
   bnb_stats_t<i_t, f_t>& stats,
   logger_t& log)
 {
+#ifdef DEBUG_BRANCHING
+  i_t num_integer_variables = 0;
+  for (i_t j = 0; j < original_lp_.num_cols; j++) {
+    if (var_types_[j] == variable_type_t::INTEGER) { num_integer_variables++; }
+  }
+  if (node_ptr->depth > num_integer_variables) {
+    std::vector<i_t> branched_variables(original_lp_.num_cols, 0);
+    std::vector<f_t> branched_lower(original_lp_.num_cols, std::numeric_limits<f_t>::quiet_NaN());
+    std::vector<f_t> branched_upper(original_lp_.num_cols, std::numeric_limits<f_t>::quiet_NaN());
+    mip_node_t<i_t, f_t>* parent = node_ptr->parent;
+    while (parent != nullptr) {
+      if (original_lp_.lower[parent->branch_var] != 0.0 ||
+          original_lp_.upper[parent->branch_var] != 1.0) {
+        break;
+      }
+      if (branched_variables[parent->branch_var] == 1) {
+        printf(
+          "Variable %d already branched. Previous lower %e upper %e. Current lower %e upper %e.\n",
+          parent->branch_var,
+          branched_lower[parent->branch_var],
+          branched_upper[parent->branch_var],
+          parent->branch_var_lower,
+          parent->branch_var_upper);
+      }
+      branched_variables[parent->branch_var] = 1;
+      branched_lower[parent->branch_var]     = parent->branch_var_lower;
+      branched_upper[parent->branch_var]     = parent->branch_var_upper;
+      parent                                 = parent->parent;
+    }
+    if (parent == nullptr) {
+      printf("Depth %d > num_integer_variables %d\n", node_ptr->depth, num_integer_variables);
+    }
+  }
+#endif
+
   std::vector<variable_status_t>& leaf_vstatus = node_ptr->vstatus;
   assert(leaf_vstatus.size() == leaf_problem.num_cols);
 
@@ -678,29 +883,27 @@ dual::status_t branch_and_bound_t<i_t, f_t>::solve_node_lp(
 #endif
 
   // Reset the bound_changed markers
-  std::fill(node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false);
+  std::vector<bool> bounds_changed(original_lp_.num_cols, false);
 
   // Set the correct bounds for the leaf problem
   if (recompute_bounds_and_basis) {
     leaf_problem.lower = root_lower;
     leaf_problem.upper = root_upper;
-    node_ptr->get_variable_bounds(
-      leaf_problem.lower, leaf_problem.upper, node_presolver.bounds_changed);
+    node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed);
 
   } else {
     node_ptr->update_branched_variable_bounds(
-      leaf_problem.lower, leaf_problem.upper, node_presolver.bounds_changed);
+      leaf_problem.lower, leaf_problem.upper, bounds_changed);
   }
 
-  bool feasible =
-    node_presolver.bounds_strengthening(leaf_problem.lower, leaf_problem.upper, lp_settings);
+  bool feasible = node_presolver.bounds_strengthening(
+    lp_settings, bounds_changed, leaf_problem.lower, leaf_problem.upper);
 
   dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED;
 
   if (feasible) {
-    i_t node_iter                    = 0;
-    f_t lp_start_time                = tic();
-    std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
+    i_t node_iter     = 0;
+    f_t lp_start_time = tic();
 
     lp_status = dual_phase2_with_advanced_basis(2,
                                                 0,
@@ -717,7 +920,7 @@ dual::status_t branch_and_bound_t<i_t, f_t>::solve_node_lp(
                                                 leaf_edge_norms);
 
     if (lp_status == dual::status_t::NUMERICAL) {
-      log.printf("Numerical issue node %d. Resolving from scratch.\n", node_ptr->node_id);
+      log.debug("Numerical issue node %d. Resolving from scratch.\n", node_ptr->node_id);
       lp_status_t second_status = solve_linear_program_with_advanced_basis(leaf_problem,
                                                                            lp_start_time,
                                                                            lp_settings,
@@ -748,6 +951,7 @@ std::pair<node_status_t, rounding_direction_t> branch_and_bound_t<i_t, f_t>::upd
   search_tree_t<i_t, f_t>& search_tree,
   lp_problem_t<i_t, f_t>& leaf_problem,
   lp_solution_t<i_t, f_t>& leaf_solution,
+  std::vector<f_t>& leaf_edge_norms,
   bnb_worker_type_t thread_type,
   dual::status_t lp_status,
   logger_t& log)
@@ -776,6 +980,23 @@ std::pair<node_status_t, rounding_direction_t> branch_and_bound_t<i_t, f_t>::upd
     i_t leaf_num_fractional =
       fractional_variables(settings_, leaf_solution.x, var_types_, leaf_fractional);
 
+#ifdef DEBUG_FRACTIONAL_FIXED
+    // Check if any of the fractional variables were fixed to their bounds
+    for (i_t j : leaf_fractional) {
+      if (leaf_problem.lower[j] == leaf_problem.upper[j]) {
+        printf(
+          "Node %d: Fixed variable %d has a fractional value %e. Lower %e upper %e. Variable "
+          "status %d\n",
+          node_ptr->node_id,
+          j,
+          leaf_solution.x[j],
+          leaf_problem.lower[j],
+          leaf_problem.upper[j],
+          leaf_vstatus[j]);
+      }
+    }
+#endif
+
     f_t leaf_objective    = compute_objective(leaf_problem, leaf_solution.x);
     node_ptr->lower_bound = leaf_objective;
     search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective);
@@ -815,8 +1036,13 @@ std::pair<node_status_t, rounding_direction_t> branch_and_bound_t<i_t, f_t>::upd
           pc_.obj_estimate(leaf_fractional, leaf_solution.x, node_ptr->lower_bound, pc_log);
       }
 
-      search_tree.branch(
-        node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, leaf_problem, log);
+      search_tree.branch(node_ptr,
+                         branch_var,
+                         leaf_solution.x[branch_var],
+                         leaf_num_fractional,
+                         leaf_vstatus,
+                         leaf_problem,
+                         log);
       search_tree.update(node_ptr, node_status_t::HAS_CHILDREN);
       return {node_status_t::HAS_CHILDREN, round_dir};
 
@@ -877,7 +1103,7 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(mip_node_t<i_t, f_t>* nod
     bool should_report = should_report_.exchange(false);
 
     if (should_report) {
-      report(' ', upper_bound, root_objective_, node->depth);
+      report(' ', upper_bound, root_objective_, node->depth, node->integer_infeasible);
       exploration_stats_.nodes_since_last_log = 0;
       exploration_stats_.last_log             = tic();
       should_report_                          = true;
@@ -900,9 +1126,11 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(mip_node_t<i_t, f_t>* nod
   std::vector<i_t> nonbasic_list;
 
   lp_solution_t<i_t, f_t> leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols);
-  dual::status_t lp_status = solve_node_lp(node,
+  std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
+  dual::status_t lp_status         = solve_node_lp(node,
                                            leaf_problem,
                                            leaf_solution,
+                                           leaf_edge_norms,
                                            basis_factors,
                                            basic_list,
                                            nonbasic_list,
@@ -926,6 +1154,7 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(mip_node_t<i_t, f_t>* nod
                                               search_tree_,
                                               leaf_problem,
                                               leaf_solution,
+                                              leaf_edge_norms,
                                               bnb_worker_type_t::BEST_FIRST,
                                               lp_status,
                                               settings_.log);
@@ -1000,7 +1229,7 @@ void branch_and_bound_t<i_t, f_t>::plunge_from(i_t task_id,
             abs_gap < 10 * settings_.absolute_mip_gap_tol) &&
            time_since_last_log >= 1) ||
           (time_since_last_log > 30) || now > settings_.time_limit) {
-        report(' ', upper_bound, get_lower_bound(), node_ptr->depth);
+        report(' ', upper_bound, get_lower_bound(), node_ptr->depth, node_ptr->integer_infeasible);
         exploration_stats_.last_log             = tic();
         exploration_stats_.nodes_since_last_log = 0;
       }
@@ -1016,9 +1245,11 @@ void branch_and_bound_t<i_t, f_t>::plunge_from(i_t task_id,
     }
 
     lp_solution_t<i_t, f_t> leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols);
-    dual::status_t lp_status = solve_node_lp(node_ptr,
+    std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
+    dual::status_t lp_status         = solve_node_lp(node_ptr,
                                              leaf_problem,
                                              leaf_solution,
+                                             leaf_edge_norms,
                                              basis_factors,
                                              basic_list,
                                              nonbasic_list,
@@ -1045,6 +1276,7 @@ void branch_and_bound_t<i_t, f_t>::plunge_from(i_t task_id,
                                                 search_tree_,
                                                 leaf_problem,
                                                 leaf_solution,
+                                                leaf_edge_norms,
                                                 bnb_worker_type_t::BEST_FIRST,
                                                 lp_status,
                                                 settings_.log);
@@ -1187,9 +1419,11 @@ void branch_and_bound_t<i_t, f_t>::dive_from(mip_node_t<i_t, f_t>& start_node,
     if (dive_stats.nodes_explored > diving_node_limit) { break; }
 
     lp_solution_t<i_t, f_t> leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols);
-    dual::status_t lp_status = solve_node_lp(node_ptr,
+    std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
+    dual::status_t lp_status         = solve_node_lp(node_ptr,
                                              leaf_problem,
                                              leaf_solution,
+                                             leaf_edge_norms,
                                              basis_factors,
                                              basic_list,
                                              nonbasic_list,
@@ -1210,9 +1444,15 @@ void branch_and_bound_t<i_t, f_t>::dive_from(mip_node_t<i_t, f_t>& start_node,
 
     ++dive_stats.nodes_explored;
 
-    auto [node_status, round_dir] =
-      update_tree(node_ptr, dive_tree, leaf_problem, leaf_solution, diving_type, lp_status, log);
-    recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN;
+    auto [node_status, round_dir] = update_tree(node_ptr,
+                                                dive_tree,
+                                                leaf_problem,
+                                                leaf_solution,
+                                                leaf_edge_norms,
+                                                diving_type,
+                                                lp_status,
+                                                log);
+    recompute_bounds_and_basis    = node_status != node_status_t::HAS_CHILDREN;
 
     if (node_status == node_status_t::HAS_CHILDREN) {
       if (round_dir == rounding_direction_t::UP) {
@@ -1239,6 +1479,7 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(bnb_worker_type_t diving_type)
   lp_problem_t<i_t, f_t> leaf_problem = original_lp_;
   std::vector<char> row_sense;
   bounds_strengthening_t<i_t, f_t> node_presolver(leaf_problem, Arow_, row_sense, var_types_);
+  std::vector<bool> bounds_changed(original_lp_.num_cols, false);
 
   const i_t m = leaf_problem.num_rows;
   basis_update_mpf_t<i_t, f_t> basis_factors(m, settings_.refactor_frequency);
@@ -1254,7 +1495,7 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(bnb_worker_type_t diving_type)
     if (reset_starting_bounds) {
       start_lower = original_lp_.lower;
       start_upper = original_lp_.upper;
-      std::fill(node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false);
+      std::fill(bounds_changed.begin(), bounds_changed.end(), false);
       reset_starting_bounds = false;
     }
 
@@ -1268,8 +1509,7 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(bnb_worker_type_t diving_type)
     std::optional<mip_node_t<i_t, f_t>> start_node = std::nullopt;
 
     if (node_ptr.has_value()) {
-      node_ptr.value()->get_variable_bounds(
-        start_lower, start_upper, node_presolver.bounds_changed);
+      node_ptr.value()->get_variable_bounds(start_lower, start_upper, bounds_changed);
       start_node = node_ptr.value()->detach_copy();
     }
     node_queue_.unlock();
@@ -1278,7 +1518,8 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(bnb_worker_type_t diving_type)
       reset_starting_bounds = true;
 
       if (upper_bound_ < start_node->lower_bound) { continue; }
-      bool is_feasible = node_presolver.bounds_strengthening(start_lower, start_upper, settings_);
+      bool is_feasible =
+        node_presolver.bounds_strengthening(settings_, bounds_changed, start_lower, start_upper);
       if (!is_feasible) { continue; }
 
       dive_from(start_node.value(),
@@ -1296,7 +1537,13 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(bnb_worker_type_t diving_type)
 
 template <typename i_t, typename f_t>
 lp_status_t branch_and_bound_t<i_t, f_t>::solve_root_relaxation(
-  simplex_solver_settings_t<i_t, f_t> const& lp_settings)
+  simplex_solver_settings_t<i_t, f_t> const& lp_settings,
+  lp_solution_t<i_t, f_t>& root_relax_soln,
+  std::vector<variable_status_t>& root_vstatus,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  std::vector<i_t>& basic_list,
+  std::vector<i_t>& nonbasic_list,
+  std::vector<f_t>& edge_norms)
 {
   f_t start_time          = tic();
   f_t user_objective      = 0;
@@ -1307,13 +1554,16 @@ lp_status_t branch_and_bound_t<i_t, f_t>::solve_root_relaxation(
   lp_status_t root_status;
   std::future<lp_status_t> root_status_future;
   root_status_future = std::async(std::launch::async,
-                                  &solve_linear_program_advanced<i_t, f_t>,
+                                  &solve_linear_program_with_advanced_basis<i_t, f_t>,
                                   std::ref(original_lp_),
                                   exploration_stats_.start_time,
                                   std::ref(lp_settings),
-                                  std::ref(root_relax_soln_),
-                                  std::ref(root_vstatus_),
-                                  std::ref(edge_norms_));
+                                  std::ref(root_relax_soln),
+                                  std::ref(basis_update),
+                                  std::ref(basic_list),
+                                  std::ref(nonbasic_list),
+                                  std::ref(root_vstatus),
+                                  std::ref(edge_norms));
   // Wait for the root relaxation solution to be sent by the diversity manager or dual simplex
   // to finish
   while (!root_crossover_solution_set_.load(std::memory_order_acquire) &&
@@ -1355,17 +1605,55 @@ lp_status_t branch_and_bound_t<i_t, f_t>::solve_root_relaxation(
 
     // Check if crossover was stopped by dual simplex
     if (crossover_status == crossover_status_t::OPTIMAL) {
-      set_root_concurrent_halt(1);  // Stop dual simplex
-      root_status = root_status_future.get();
-
+      set_root_concurrent_halt(1);             // Stop dual simplex
+      root_status = root_status_future.get();  // Wait for dual simplex to finish
+      set_root_concurrent_halt(0);             // Clear the concurrent halt flag
       // Override the root relaxation solution with the crossover solution
-      root_relax_soln_ = root_crossover_soln_;
-      root_vstatus_    = crossover_vstatus_;
-      root_status      = lp_status_t::OPTIMAL;
-      user_objective   = root_crossover_soln_.user_objective;
-      iter             = root_crossover_soln_.iterations;
-      solver_name      = "Barrier/PDLP and Crossover";
+      root_relax_soln = root_crossover_soln_;
+      root_vstatus    = crossover_vstatus_;
+      root_status     = lp_status_t::OPTIMAL;
+      basic_list.clear();
+      nonbasic_list.reserve(original_lp_.num_cols - original_lp_.num_rows);
+      nonbasic_list.clear();
+      // Get the basic list and nonbasic list from the vstatus
+      for (i_t j = 0; j < original_lp_.num_cols; j++) {
+        if (crossover_vstatus_[j] == variable_status_t::BASIC) {
+          basic_list.push_back(j);
+        } else {
+          nonbasic_list.push_back(j);
+        }
+      }
+      if (basic_list.size() != original_lp_.num_rows) {
+        settings_.log.printf(
+          "basic_list size %d != m %d\n", basic_list.size(), original_lp_.num_rows);
+        assert(basic_list.size() == original_lp_.num_rows);
+      }
+      if (nonbasic_list.size() != original_lp_.num_cols - original_lp_.num_rows) {
+        settings_.log.printf("nonbasic_list size %d != n - m %d\n",
+                             nonbasic_list.size(),
+                             original_lp_.num_cols - original_lp_.num_rows);
+        assert(nonbasic_list.size() == original_lp_.num_cols - original_lp_.num_rows);
+      }
+      // Populate the basis_update from the crossover vstatus
+      i_t refactor_status = basis_update.refactor_basis(original_lp_.A,
+                                                        root_crossover_settings,
+                                                        original_lp_.lower,
+                                                        original_lp_.upper,
+                                                        basic_list,
+                                                        nonbasic_list,
+                                                        crossover_vstatus_);
+      if (refactor_status != 0) {
+        settings_.log.printf("Failed to refactor basis. %d deficient columns.\n", refactor_status);
+        assert(refactor_status == 0);
+        root_status = lp_status_t::NUMERICAL_ISSUES;
+      }
 
+      // Set the edge norms to a default value
+      edge_norms.resize(original_lp_.num_cols, -1.0);
+      set_uninitialized_steepest_edge_norms<i_t, f_t>(original_lp_, basic_list, edge_norms);
+      user_objective = root_crossover_soln_.user_objective;
+      iter           = root_crossover_soln_.iterations;
+      solver_name    = "Barrier/PDLP and Crossover";
     } else {
       root_status    = root_status_future.get();
       user_objective = root_relax_soln_.user_objective;
@@ -1410,28 +1698,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   original_lp_.A.to_compressed_row(Arow_);
 
   std::vector<bnb_worker_type_t> diving_strategies;
-  diving_strategies.reserve(4);
-
-  if (settings_.diving_settings.pseudocost_diving != 0) {
-    diving_strategies.push_back(bnb_worker_type_t::PSEUDOCOST_DIVING);
-  }
-
-  if (settings_.diving_settings.line_search_diving != 0) {
-    diving_strategies.push_back(bnb_worker_type_t::LINE_SEARCH_DIVING);
-  }
-
-  if (settings_.diving_settings.guided_diving != 0) {
-    diving_strategies.push_back(bnb_worker_type_t::GUIDED_DIVING);
-  }
-
-  if (settings_.diving_settings.coefficient_diving != 0) {
-    diving_strategies.push_back(bnb_worker_type_t::COEFFICIENT_DIVING);
-    calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_);
-  }
-
-  if (diving_strategies.empty()) {
-    settings_.log.printf("Warning: All diving heuristics are disabled!\n");
-  }
+  initialize_diving_heuristics_settings(diving_strategies);
 
   if (guess_.size() != 0) {
     std::vector<f_t> crushed_guess;
@@ -1452,25 +1719,37 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
   root_relax_soln_.resize(original_lp_.num_rows, original_lp_.num_cols);
 
-  lp_status_t root_status;
+  i_t original_rows                     = original_lp_.num_rows;
   simplex_solver_settings_t lp_settings = settings_;
   lp_settings.inside_mip                = 1;
+  lp_settings.scale_columns             = false;
   lp_settings.concurrent_halt           = get_root_concurrent_halt();
-  // RINS/SUBMIP path
+  std::vector<i_t> basic_list(original_lp_.num_rows);
+  std::vector<i_t> nonbasic_list;
+  basis_update_mpf_t<i_t, f_t> basis_update(original_lp_.num_rows, settings_.refactor_frequency);
+  lp_status_t root_status;
   if (!enable_concurrent_lp_root_solve()) {
+    // RINS/SUBMIP path
     settings_.log.printf("\nSolving LP root relaxation with dual simplex\n");
-    root_status = solve_linear_program_advanced(original_lp_,
-                                                exploration_stats_.start_time,
-                                                lp_settings,
-                                                root_relax_soln_,
-                                                root_vstatus_,
-                                                edge_norms_);
-
+    root_status = solve_linear_program_with_advanced_basis(original_lp_,
+                                                           exploration_stats_.start_time,
+                                                           lp_settings,
+                                                           root_relax_soln_,
+                                                           basis_update,
+                                                           basic_list,
+                                                           nonbasic_list,
+                                                           root_vstatus_,
+                                                           edge_norms_);
   } else {
     settings_.log.printf("\nSolving LP root relaxation in concurrent mode\n");
-    root_status = solve_root_relaxation(lp_settings);
+    root_status = solve_root_relaxation(lp_settings,
+                                        root_relax_soln_,
+                                        root_vstatus_,
+                                        basis_update,
+                                        basic_list,
+                                        nonbasic_list,
+                                        edge_norms_);
   }
-
   exploration_stats_.total_lp_iters      = root_relax_soln_.iterations;
   exploration_stats_.total_lp_solve_time = toc(exploration_stats_.start_time);
 
@@ -1491,15 +1770,19 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
     }
     return mip_status_t::UNBOUNDED;
   }
-
   if (root_status == lp_status_t::TIME_LIMIT) {
     solver_status_ = mip_status_t::TIME_LIMIT;
     set_final_solution(solution, -inf);
     return solver_status_;
   }
+  if (root_status == lp_status_t::NUMERICAL_ISSUES) {
+    solver_status_ = mip_status_t::NUMERICAL;
+    set_final_solution(solution, -inf);
+    return solver_status_;
+  }
 
   assert(root_vstatus_.size() == original_lp_.num_cols);
-  set_uninitialized_steepest_edge_norms<i_t, f_t>(edge_norms_);
+  set_uninitialized_steepest_edge_norms<i_t, f_t>(original_lp_, basic_list, edge_norms_);
 
   root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);
   local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_);
@@ -1520,33 +1803,280 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   }
 
   std::vector<i_t> fractional;
-  const i_t num_fractional =
-    fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
+  i_t num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
 
+  cut_info_t<i_t, f_t> cut_info;
   if (num_fractional == 0) {
-    mutex_upper_.lock();
-    incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
-    upper_bound_ = root_objective_;
-    mutex_upper_.unlock();
-    // We should be done here
-    uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x);
-    solution.objective          = incumbent_.objective;
-    solution.lower_bound        = root_objective_;
-    solution.nodes_explored     = 0;
-    solution.simplex_iterations = root_relax_soln_.iterations;
-    settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n",
-                         compute_user_objective(original_lp_, root_objective_),
-                         toc(exploration_stats_.start_time));
+    set_solution_at_root(solution, cut_info);
+    return mip_status_t::OPTIMAL;
+  }
 
-    if (settings_.solution_callback != nullptr) {
-      settings_.solution_callback(solution.x, solution.objective);
-    }
-    if (settings_.heuristic_preemption_callback != nullptr) {
-      settings_.heuristic_preemption_callback();
+  is_running           = true;
+  lower_bound_ceiling_ = inf;
+
+  if (num_fractional != 0 && settings_.max_cut_passes > 0) {
+    settings_.log.printf(
+      " | Explored | Unexplored |    Objective    |     Bound     | IntInf | Depth | Iter/Node |   "
+      "Gap    "
+      "|  Time  |\n");
+  }
+
+  cut_pool_t<i_t, f_t> cut_pool(original_lp_.num_cols, settings_);
+  cut_generation_t<i_t, f_t> cut_generation(
+    cut_pool, original_lp_, settings_, Arow_, new_slacks_, var_types_);
+
+  std::vector<f_t> saved_solution;
+#ifdef CHECK_CUTS_AGAINST_SAVED_SOLUTION
+  read_saved_solution_for_cut_verification(original_lp_, settings_, saved_solution);
+#endif
+
+  f_t last_upper_bound     = std::numeric_limits<f_t>::infinity();
+  f_t last_objective       = root_objective_;
+  f_t root_relax_objective = root_objective_;
+
+  i_t cut_pool_size = 0;
+  for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
+    if (num_fractional == 0) {
+      set_solution_at_root(solution, cut_info);
+      return mip_status_t::OPTIMAL;
+    } else {
+#ifdef PRINT_FRACTIONAL_INFO
+      settings_.log.printf(
+        "Found %d fractional variables on cut pass %d\n", num_fractional, cut_pass);
+      for (i_t j : fractional) {
+        settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n",
+                             j,
+                             original_lp_.lower[j],
+                             root_relax_soln_.x[j],
+                             original_lp_.upper[j]);
+      }
+#endif
+
+      // Generate cuts and add them to the cut pool
+      f_t cut_start_time = tic();
+      cut_generation.generate_cuts(original_lp_,
+                                   settings_,
+                                   Arow_,
+                                   new_slacks_,
+                                   var_types_,
+                                   basis_update,
+                                   root_relax_soln_.x,
+                                   basic_list,
+                                   nonbasic_list);
+      f_t cut_generation_time = toc(cut_start_time);
+      if (cut_generation_time > 1.0) {
+        settings_.log.debug("Cut generation time %.2f seconds\n", cut_generation_time);
+      }
+      // Score the cuts
+      f_t score_start_time = tic();
+      cut_pool.score_cuts(root_relax_soln_.x);
+      f_t score_time = toc(score_start_time);
+      if (score_time > 1.0) { settings_.log.debug("Cut scoring time %.2f seconds\n", score_time); }
+      // Get the best cuts from the cut pool
+      csr_matrix_t<i_t, f_t> cuts_to_add(0, original_lp_.num_cols, 0);
+      std::vector<f_t> cut_rhs;
+      std::vector<cut_type_t> cut_types;
+      i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types);
+      if (num_cuts == 0) { break; }
+      cut_info.record_cut_types(cut_types);
+#ifdef PRINT_CUT_POOL_TYPES
+      cut_pool.print_cutpool_types();
+      print_cut_types("In LP      ", cut_types, settings_);
+      printf("Cut pool size: %d\n", cut_pool.pool_size());
+#endif
+
+#ifdef CHECK_CUT_MATRIX
+      if (cuts_to_add.check_matrix() != 0) {
+        settings_.log.printf("Bad cuts matrix\n");
+        for (i_t i = 0; i < static_cast<i_t>(cut_types.size()); ++i) {
+          settings_.log.printf("row %d cut type %d\n", i, cut_types[i]);
+        }
+        return mip_status_t::NUMERICAL;
+      }
+#endif
+      // Check against saved solution
+#ifdef CHECK_CUTS_AGAINST_SAVED_SOLUTION
+      verify_cuts_against_saved_solution(cuts_to_add, cut_rhs, saved_solution);
+#endif
+      cut_pool_size = cut_pool.pool_size();
+
+      // Resolve the LP with the new cuts
+      settings_.log.debug(
+        "Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
+        num_cuts,
+        cuts_to_add.row_start[cuts_to_add.m],
+        cut_pool.pool_size(),
+        cuts_to_add.m + original_lp_.num_rows);
+      lp_settings.log.log = false;
+
+      f_t add_cuts_start_time = tic();
+      mutex_original_lp_.lock();
+      i_t add_cuts_status = add_cuts(settings_,
+                                     cuts_to_add,
+                                     cut_rhs,
+                                     original_lp_,
+                                     new_slacks_,
+                                     root_relax_soln_,
+                                     basis_update,
+                                     basic_list,
+                                     nonbasic_list,
+                                     root_vstatus_,
+                                     edge_norms_);
+      var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS);
+      mutex_original_lp_.unlock();
+      f_t add_cuts_time = toc(add_cuts_start_time);
+      if (add_cuts_time > 1.0) {
+        settings_.log.debug("Add cuts time %.2f seconds\n", add_cuts_time);
+      }
+      if (add_cuts_status != 0) {
+        settings_.log.printf("Failed to add cuts\n");
+        return mip_status_t::NUMERICAL;
+      }
+
+      if (settings_.reduced_cost_strengthening >= 1 && upper_bound_.load() < last_upper_bound) {
+        mutex_upper_.lock();
+        last_upper_bound = upper_bound_.load();
+        std::vector<f_t> lower_bounds;
+        std::vector<f_t> upper_bounds;
+        find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds);
+        mutex_upper_.unlock();
+        mutex_original_lp_.lock();
+        original_lp_.lower = lower_bounds;
+        original_lp_.upper = upper_bounds;
+        mutex_original_lp_.unlock();
+      }
+
+      // Try to do bound strengthening
+      std::vector<bool> bounds_changed(original_lp_.num_cols, true);
+      std::vector<char> row_sense;
+#ifdef CHECK_MATRICES
+      settings_.log.printf("Before A check\n");
+      original_lp_.A.check_matrix();
+#endif
+      original_lp_.A.to_compressed_row(Arow_);
+
+      f_t node_presolve_start_time = tic();
+      bounds_strengthening_t<i_t, f_t> node_presolve(original_lp_, Arow_, row_sense, var_types_);
+      std::vector<f_t> new_lower = original_lp_.lower;
+      std::vector<f_t> new_upper = original_lp_.upper;
+      bool feasible =
+        node_presolve.bounds_strengthening(settings_, bounds_changed, new_lower, new_upper);
+      mutex_original_lp_.lock();
+      original_lp_.lower = new_lower;
+      original_lp_.upper = new_upper;
+      mutex_original_lp_.unlock();
+      f_t node_presolve_time = toc(node_presolve_start_time);
+      if (node_presolve_time > 1.0) {
+        settings_.log.debug("Node presolve time %.2f seconds\n", node_presolve_time);
+      }
+      if (!feasible) {
+        settings_.log.printf("Bound strengthening detected infeasibility\n");
+        return mip_status_t::INFEASIBLE;
+      }
+
+      i_t iter                    = 0;
+      bool initialize_basis       = false;
+      lp_settings.concurrent_halt = NULL;
+      f_t dual_phase2_start_time  = tic();
+      dual::status_t cut_status   = dual_phase2_with_advanced_basis(2,
+                                                                  0,
+                                                                  initialize_basis,
+                                                                  exploration_stats_.start_time,
+                                                                  original_lp_,
+                                                                  lp_settings,
+                                                                  root_vstatus_,
+                                                                  basis_update,
+                                                                  basic_list,
+                                                                  nonbasic_list,
+                                                                  root_relax_soln_,
+                                                                  iter,
+                                                                  edge_norms_);
+      f_t dual_phase2_time        = toc(dual_phase2_start_time);
+      if (dual_phase2_time > 1.0) {
+        settings_.log.debug("Dual phase2 time %.2f seconds\n", dual_phase2_time);
+      }
+      if (cut_status == dual::status_t::TIME_LIMIT) {
+        solver_status_ = mip_status_t::TIME_LIMIT;
+        set_final_solution(solution, root_objective_);
+        return solver_status_;
+      }
+
+      if (cut_status != dual::status_t::OPTIMAL) {
+        settings_.log.printf("Cut status %s\n", dual::status_to_string(cut_status).c_str());
+        return mip_status_t::NUMERICAL;
+      }
+      exploration_stats_.total_lp_iters += root_relax_soln_.iterations;
+      root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);
+
+      local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_);
+
+      f_t remove_cuts_start_time = tic();
+      mutex_original_lp_.lock();
+      remove_cuts(original_lp_,
+                  settings_,
+                  Arow_,
+                  new_slacks_,
+                  original_rows,
+                  var_types_,
+                  root_vstatus_,
+                  edge_norms_,
+                  root_relax_soln_.x,
+                  root_relax_soln_.y,
+                  root_relax_soln_.z,
+                  basic_list,
+                  nonbasic_list,
+                  basis_update);
+      mutex_original_lp_.unlock();
+      f_t remove_cuts_time = toc(remove_cuts_start_time);
+      if (remove_cuts_time > 1.0) {
+        settings_.log.debug("Remove cuts time %.2f seconds\n", remove_cuts_time);
+      }
+      fractional.clear();
+      num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
+
+      if (num_fractional == 0) {
+        upper_bound_ = root_objective_;
+        mutex_upper_.lock();
+        incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
+        mutex_upper_.unlock();
+      }
+      f_t obj = upper_bound_.load();
+      report(' ', obj, root_objective_, 0, num_fractional);
+
+      f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), root_objective_);
+      f_t abs_gap = upper_bound_.load() - root_objective_;
+      if (rel_gap < settings_.relative_mip_gap_tol || abs_gap < settings_.absolute_mip_gap_tol) {
+        set_solution_at_root(solution, cut_info);
+        set_final_solution(solution, root_objective_);
+        return mip_status_t::OPTIMAL;
+      }
+
+      f_t change_in_objective = root_objective_ - last_objective;
+      const f_t factor        = settings_.cut_change_threshold;
+      const f_t min_objective = 1e-3;
+      if (change_in_objective <= factor * std::max(min_objective, std::abs(root_relax_objective))) {
+        settings_.log.debug(
+          "Change in objective %.16e is less than 1e-3 of root relax objective %.16e\n",
+          change_in_objective,
+          root_relax_objective);
+        break;
+      }
+      last_objective = root_objective_;
     }
-    return mip_status_t::OPTIMAL;
   }
 
+  print_cut_info(settings_, cut_info);
+
+  if (cut_info.has_cuts()) {
+    settings_.log.printf("Cut pool size  : %d\n", cut_pool_size);
+    settings_.log.printf("Size with cuts : %d constraints, %d variables, %d nonzeros\n",
+                         original_lp_.num_rows,
+                         original_lp_.num_cols,
+                         original_lp_.A.col_start[original_lp_.A.n]);
+  }
+
+  set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_);
+
   pc_.resize(original_lp_.num_cols);
   strong_branching<i_t, f_t>(original_problem_,
                              original_lp_,
@@ -1566,6 +2096,50 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
     return solver_status_;
   }
 
+  if (settings_.reduced_cost_strengthening >= 2 && upper_bound_.load() < last_upper_bound) {
+    std::vector<f_t> lower_bounds;
+    std::vector<f_t> upper_bounds;
+    i_t num_fixed = find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds);
+    if (num_fixed > 0) {
+      std::vector<bool> bounds_changed(original_lp_.num_cols, true);
+      std::vector<char> row_sense;
+
+      bounds_strengthening_t<i_t, f_t> node_presolve(original_lp_, Arow_, row_sense, var_types_);
+
+      mutex_original_lp_.lock();
+      original_lp_.lower = lower_bounds;
+      original_lp_.upper = upper_bounds;
+      bool feasible      = node_presolve.bounds_strengthening(
+        settings_, bounds_changed, original_lp_.lower, original_lp_.upper);
+      mutex_original_lp_.unlock();
+      if (!feasible) {
+        settings_.log.printf("Bound strengthening failed\n");
+        return mip_status_t::NUMERICAL;  // We had a feasible integer solution, but bound
+                                         // strengthening thinks we are infeasible.
+      }
+      // Go through and check the fractional variables and remove any that are now fixed to their
+      // bounds
+      std::vector<i_t> to_remove(fractional.size(), 0);
+      i_t num_to_remove = 0;
+      for (i_t k = 0; k < fractional.size(); k++) {
+        const i_t j = fractional[k];
+        if (std::abs(original_lp_.upper[j] - original_lp_.lower[j]) < settings_.fixed_tol) {
+          to_remove[k] = 1;
+          num_to_remove++;
+        }
+      }
+      if (num_to_remove > 0) {
+        std::vector<i_t> new_fractional;
+        new_fractional.reserve(fractional.size() - num_to_remove);
+        for (i_t k = 0; k < fractional.size(); k++) {
+          if (!to_remove[k]) { new_fractional.push_back(fractional[k]); }
+        }
+        fractional     = new_fractional;
+        num_fractional = fractional.size();
+      }
+    }
+  }
+
   // Choose variable to branch on
   i_t branch_var = pc_.variable_selection(fractional, root_relax_soln_.x, log);
 
@@ -1575,6 +2149,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   search_tree_.branch(&search_tree_.root,
                       branch_var,
                       root_relax_soln_.x[branch_var],
+                      num_fractional,
                       root_vstatus_,
                       original_lp_,
                       log);
@@ -1589,14 +2164,13 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   exploration_stats_.nodes_since_last_log = 0;
   exploration_stats_.last_log             = tic();
   active_subtrees_                        = 0;
-  is_running                              = true;
   lower_bound_ceiling_                    = inf;
   should_report_                          = true;
 
   settings_.log.printf(
-    "  | Explored | Unexplored |    Objective    |     Bound     | Depth | Iter/Node |   Gap    "
+    " | Explored | Unexplored |    Objective    |     Bound     | IntInf | Depth | Iter/Node |   "
+    "Gap    "
     "|  Time  |\n");
-
 #pragma omp parallel num_threads(settings_.num_threads)
   {
 #pragma omp master
diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp
index 19621b8896..6db45e1531 100644
--- a/cpp/src/dual_simplex/branch_and_bound.hpp
+++ b/cpp/src/dual_simplex/branch_and_bound.hpp
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include <dual_simplex/cuts.hpp>
 #include <dual_simplex/diving_heuristics.hpp>
 #include <dual_simplex/initial_basis.hpp>
 #include <dual_simplex/mip_node.hpp>
@@ -72,7 +73,8 @@ template <typename i_t, typename f_t>
 class branch_and_bound_t {
  public:
   branch_and_bound_t(const user_problem_t<i_t, f_t>& user_problem,
-                     const simplex_solver_settings_t<i_t, f_t>& solver_settings);
+                     const simplex_solver_settings_t<i_t, f_t>& solver_settings,
+                     f_t start_time);
 
   // Set an initial guess based on the user_problem. This should be called before solve.
   void set_initial_guess(const std::vector<f_t>& user_guess) { guess_ = user_guess; }
@@ -117,7 +119,17 @@ class branch_and_bound_t {
   bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; }
   std::atomic<int>* get_root_concurrent_halt() { return &root_concurrent_halt_; }
   void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; }
-  lp_status_t solve_root_relaxation(simplex_solver_settings_t<i_t, f_t> const& lp_settings);
+  lp_status_t solve_root_relaxation(simplex_solver_settings_t<i_t, f_t> const& lp_settings,
+                                    lp_solution_t<i_t, f_t>& root_relax_soln,
+                                    std::vector<variable_status_t>& root_vstatus,
+                                    basis_update_mpf_t<i_t, f_t>& basis_update,
+                                    std::vector<i_t>& basic_list,
+                                    std::vector<i_t>& nonbasic_list,
+                                    std::vector<f_t>& edge_norms);
+
+  i_t find_reduced_cost_fixings(f_t upper_bound,
+                                std::vector<f_t>& lower_bounds,
+                                std::vector<f_t>& upper_bounds);
 
   // The main entry routine. Returns the solver status and populates solution with the incumbent.
   mip_status_t solve(mip_solution_t<i_t, f_t>& solution);
@@ -144,6 +156,13 @@ class branch_and_bound_t {
   // Local lower bounds for each thread
   std::vector<omp_atomic_t<f_t>> local_lower_bounds_;
 
+  // Mutex for the original LP
+  // The heuristics threads look at the original LP. But the main thread modifies the
+  // size of the original LP by adding slacks for cuts. Heuristic threads should lock
+  // this mutex when accessing the original LP. The main thread should lock this mutex
+  // when modifying the original LP.
+  omp_mutex_t mutex_original_lp_;
+
   // Mutex for upper bound
   omp_mutex_t mutex_upper_;
 
@@ -196,7 +215,11 @@ class branch_and_bound_t {
   std::function<void(f_t)> user_bound_callback_;
 
   void report_heuristic(f_t obj);
-  void report(char symbol, f_t obj, f_t lower_bound, i_t node_depth);
+  void report(char symbol, f_t obj, f_t lower_bound, i_t node_depth, i_t node_int_infeas);
+
+  // Set the solution when found at the root node
+  void set_solution_at_root(mip_solution_t<i_t, f_t>& solution,
+                            const cut_info_t<i_t, f_t>& cut_info);
   void update_user_bound(f_t lower_bound);
 
   // Set the final solution.
@@ -212,6 +235,9 @@ class branch_and_bound_t {
   // Repairs low-quality solutions from the heuristics, if it is applicable.
   void repair_heuristic_solutions();
 
+  // Initialize diving heuristics settings
+  void initialize_diving_heuristics_settings(std::vector<bnb_worker_type_t>& diving_strategies);
+
   // Ramp-up phase of the solver, where we greedily expand the tree until
   // there is enough unexplored nodes. This is done recursively using OpenMP tasks.
   void exploration_ramp_up(mip_node_t<i_t, f_t>* node, i_t initial_heap_size);
@@ -251,6 +277,7 @@ class branch_and_bound_t {
   dual::status_t solve_node_lp(mip_node_t<i_t, f_t>* node_ptr,
                                lp_problem_t<i_t, f_t>& leaf_problem,
                                lp_solution_t<i_t, f_t>& leaf_solution,
+                               std::vector<f_t>& leaf_edge_norms,
                                basis_update_mpf_t<i_t, f_t>& basis_factors,
                                std::vector<i_t>& basic_list,
                                std::vector<i_t>& nonbasic_list,
@@ -269,6 +296,7 @@ class branch_and_bound_t {
                                                              search_tree_t<i_t, f_t>& search_tree,
                                                              lp_problem_t<i_t, f_t>& leaf_problem,
                                                              lp_solution_t<i_t, f_t>& leaf_solution,
+                                                             std::vector<f_t>& leaf_edge_norms,
                                                              bnb_worker_type_t thread_type,
                                                              dual::status_t lp_status,
                                                              logger_t& log);
diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp
index 8ee3fb0ce4..81d5ec1e6d 100644
--- a/cpp/src/dual_simplex/crossover.cpp
+++ b/cpp/src/dual_simplex/crossover.cpp
@@ -25,7 +25,7 @@ crossover_status_t return_to_status(int status)
 {
   if (status == -1) {
     return crossover_status_t::TIME_LIMIT;
-  } else if (status == -2) {
+  } else if (status == CONCURRENT_HALT_RETURN) {
     return crossover_status_t::CONCURRENT_LIMIT;
   } else {
     return crossover_status_t::NUMERICAL_ISSUES;
@@ -502,7 +502,32 @@ i_t dual_push(const lp_problem_t<i_t, f_t>& lp,
         std::vector<i_t> q(m);
         std::vector<i_t> deficient;
         std::vector<i_t> slacks_needed;
-        factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
+        i_t rank =
+          factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
+        if (rank == CONCURRENT_HALT_RETURN) {
+          return CONCURRENT_HALT_RETURN;
+        } else if (rank != m) {
+          settings.log.printf("Failed to factorize basis. rank %d m %d\n", rank, m);
+          basis_repair(lp.A,
+                       settings,
+                       lp.lower,
+                       lp.upper,
+                       deficient,
+                       slacks_needed,
+                       basic_list,
+                       nonbasic_list,
+                       vstatus);
+          rank =
+            factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
+          if (rank == CONCURRENT_HALT_RETURN) {
+            return CONCURRENT_HALT_RETURN;
+          } else if (rank == -1) {
+            settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
+            return -1;
+          } else {
+            settings.log.printf("Basis repaired\n");
+          }
+        }
         reorder_basic_list(q, basic_list);
         // Reordering the basic list causes us to mess up the superbasic list index
         // so we need to update it
@@ -535,7 +560,7 @@ i_t dual_push(const lp_problem_t<i_t, f_t>& lp,
     }
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
       settings.log.printf("Concurrent halt\n");
-      return -2;
+      return CONCURRENT_HALT_RETURN;
     }
   }
 
@@ -783,7 +808,9 @@ i_t primal_push(const lp_problem_t<i_t, f_t>& lp,
         std::vector<i_t> slacks_needed;
         i_t rank =
           factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
-        if (rank != m) {
+        if (rank == CONCURRENT_HALT_RETURN) {
+          return CONCURRENT_HALT_RETURN;
+        } else if (rank != m) {
           settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
           basis_repair(lp.A,
                        settings,
@@ -794,8 +821,11 @@ i_t primal_push(const lp_problem_t<i_t, f_t>& lp,
                        basic_list,
                        nonbasic_list,
                        vstatus);
-          if (factorize_basis(
-                lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) {
+          rank =
+            factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
+          if (rank == CONCURRENT_HALT_RETURN) {
+            return CONCURRENT_HALT_RETURN;
+          } else if (rank == -1) {
             settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
             return -1;
           } else {
@@ -833,7 +863,7 @@ i_t primal_push(const lp_problem_t<i_t, f_t>& lp,
     }
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
       settings.log.printf("Concurrent halt\n");
-      return -2;
+      return CONCURRENT_HALT_RETURN;
     }
   }
 
@@ -1137,6 +1167,7 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
   std::vector<i_t> slacks_needed;
 
   rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
+  if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; }
   if (rank != m) {
     settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
     basis_repair(lp.A,
@@ -1148,8 +1179,10 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
                  basic_list,
                  nonbasic_list,
                  vstatus);
-    if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) ==
-        -1) {
+    rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
+    if (rank == CONCURRENT_HALT_RETURN) {
+      return crossover_status_t::CONCURRENT_LIMIT;
+    } else if (rank == -1) {
       settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
       return crossover_status_t::NUMERICAL_ISSUES;
     } else {
@@ -1218,8 +1251,8 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
     i_t primal_push_status  = primal_push(
       lp, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus);
     if (primal_push_status < 0) { return return_to_status(primal_push_status); }
-    print_crossover_info(lp, settings, vstatus, solution, "Primal push complete");
     compute_dual_solution_from_basis(lp, ft, basic_list, nonbasic_list, solution.y, solution.z);
+    print_crossover_info(lp, settings, vstatus, solution, "Primal push complete");
   } else {
     settings.log.printf("No primal push needed. No superbasic variables\n");
   }
@@ -1336,7 +1369,9 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
       get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list);
       rank =
         factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
-      if (rank != m) {
+      if (rank == CONCURRENT_HALT_RETURN) {
+        return crossover_status_t::CONCURRENT_LIMIT;
+      } else if (rank != m) {
         settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
         basis_repair(lp.A,
                      settings,
@@ -1347,8 +1382,11 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
                      basic_list,
                      nonbasic_list,
                      vstatus);
-        if (factorize_basis(
-              lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) {
+        rank =
+          factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
+        if (rank == CONCURRENT_HALT_RETURN) {
+          return crossover_status_t::CONCURRENT_LIMIT;
+        } else if (rank == -1) {
           settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
           return crossover_status_t::NUMERICAL_ISSUES;
         } else {
@@ -1358,8 +1396,7 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
       reorder_basic_list(q, basic_list);
       ft.reset(L, U, p);
 
-      compute_dual_solution_from_basis(lp, ft, basic_list, nonbasic_list, solution.y, solution.z);
-
+      solution      = phase1_solution;
       i_t num_flips = 0;
       for (i_t j = 0; j < n; ++j) {
         if (vstatus[j] == variable_status_t::BASIC) { continue; }
@@ -1376,20 +1413,23 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
         }
       }
       settings.log.debug("Num flips %d\n", num_flips);
-      solution = phase1_solution;
       print_crossover_info(lp, settings, vstatus, solution, "Dual phase 1 complete");
-      std::vector<f_t> edge_norms;
-      dual::status_t status = dual_phase2(
-        2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms);
-      if (toc(start_time) > settings.time_limit) {
-        settings.log.printf("Time limit exceeded\n");
-        return crossover_status_t::TIME_LIMIT;
-      }
-      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
-        settings.log.printf("Concurrent halt\n");
-        return crossover_status_t::CONCURRENT_LIMIT;
+      dual_infeas           = dual_infeasibility(lp, settings, vstatus, solution.z);
+      dual::status_t status = dual::status_t::NUMERICAL;
+      if (dual_infeas <= settings.dual_tol) {
+        std::vector<f_t> edge_norms;
+        status = dual_phase2(
+          2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms);
+        if (toc(start_time) > settings.time_limit) {
+          settings.log.printf("Time limit exceeded\n");
+          return crossover_status_t::TIME_LIMIT;
+        }
+        if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+          settings.log.printf("Concurrent halt\n");
+          return crossover_status_t::CONCURRENT_LIMIT;
+        }
+        solution.iterations += iter;
       }
-      solution.iterations += iter;
       primal_infeas = primal_infeasibility(lp, settings, vstatus, solution.x);
       dual_infeas   = dual_infeasibility(lp, settings, vstatus, solution.z);
       primal_res    = primal_residual(lp, solution);
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
new file mode 100644
index 0000000000..be3f3001d8
--- /dev/null
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -0,0 +1,2822 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <dual_simplex/basis_solves.hpp>
+#include <dual_simplex/cuts.hpp>
+#include <dual_simplex/dense_matrix.hpp>
+#include <dual_simplex/tic_toc.hpp>
+
+namespace cuopt::linear_programming::dual_simplex {
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::add_cut(cut_type_t cut_type,
+                                   const sparse_vector_t<i_t, f_t>& cut,
+                                   f_t rhs)
+{
+  // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool
+
+  for (i_t p = 0; p < cut.i.size(); p++) {
+    const i_t j = cut.i[p];
+    if (j >= original_vars_) {
+      settings_.log.printf(
+        "Cut has variable %d that is greater than original_vars_ %d\n", j, original_vars_);
+      return;
+    }
+  }
+
+  sparse_vector_t<i_t, f_t> cut_squeezed;
+  cut.squeeze(cut_squeezed);
+  if (cut_squeezed.i.size() == 0) {
+    settings_.log.printf("Cut has no coefficients\n");
+    return;
+  }
+  cut_storage_.append_row(cut_squeezed);
+  rhs_storage_.push_back(rhs);
+  cut_type_.push_back(cut_type);
+  cut_age_.push_back(0);
+}
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_distance(i_t row,
+                                       const std::vector<f_t>& x,
+                                       f_t& cut_violation,
+                                       f_t& cut_norm)
+{
+  const i_t row_start = cut_storage_.row_start[row];
+  const i_t row_end   = cut_storage_.row_start[row + 1];
+  f_t cut_x           = 0.0;
+  f_t dot             = 0.0;
+  for (i_t p = row_start; p < row_end; p++) {
+    const i_t j         = cut_storage_.j[p];
+    const f_t cut_coeff = cut_storage_.x[p];
+    cut_x += cut_coeff * x[j];
+    dot += cut_coeff * cut_coeff;
+  }
+  cut_violation      = rhs_storage_[row] - cut_x;
+  cut_norm           = std::sqrt(dot);
+  const f_t distance = cut_violation / cut_norm;
+  return distance;
+}
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_density(i_t row)
+{
+  const i_t row_start     = cut_storage_.row_start[row];
+  const i_t row_end       = cut_storage_.row_start[row + 1];
+  const i_t cut_nz        = row_end - row_start;
+  const i_t original_vars = original_vars_;
+  return static_cast<f_t>(cut_nz) / original_vars;
+}
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_orthogonality(i_t i, i_t j)
+{
+  const i_t i_start = cut_storage_.row_start[i];
+  const i_t i_end   = cut_storage_.row_start[i + 1];
+  const i_t i_nz    = i_end - i_start;
+  const i_t j_start = cut_storage_.row_start[j];
+  const i_t j_end   = cut_storage_.row_start[j + 1];
+  const i_t j_nz    = j_end - j_start;
+
+  f_t dot = sparse_dot(cut_storage_.j.data() + i_start,
+                       cut_storage_.x.data() + i_start,
+                       i_nz,
+                       cut_storage_.j.data() + j_start,
+                       cut_storage_.x.data() + j_start,
+                       j_nz);
+
+  f_t norm_i = cut_norms_[i];
+  f_t norm_j = cut_norms_[j];
+  return 1.0 - std::abs(dot) / (norm_i * norm_j);
+}
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
+{
+  const f_t min_cut_distance = 1e-4;
+  cut_distances_.resize(cut_storage_.m, 0.0);
+  cut_norms_.resize(cut_storage_.m, 0.0);
+
+  const bool verbose = false;
+  for (i_t i = 0; i < cut_storage_.m; i++) {
+    f_t violation;
+    f_t cut_dist      = cut_distance(i, x_relax, violation, cut_norms_[i]);
+    cut_distances_[i] = cut_dist <= min_cut_distance ? 0.0 : cut_dist;
+    if (verbose) {
+      settings_.log.printf("Cut %d type %d distance %+e violation %+e cut_norm %e\n",
+                           i,
+                           static_cast<int>(cut_type_[i]),
+                           cut_distances_[i],
+                           violation,
+                           cut_norms_[i]);
+    }
+  }
+
+  std::vector<i_t> sorted_indices;
+  best_score_last_permutation(cut_distances_, sorted_indices);
+
+  const i_t max_cuts          = 2000;
+  const f_t min_orthogonality = settings_.cut_min_orthogonality;
+  best_cuts_.reserve(std::min(max_cuts, cut_storage_.m));
+  best_cuts_.clear();
+  scored_cuts_ = 0;
+
+  if (!sorted_indices.empty()) {
+    const i_t i = sorted_indices.back();
+    sorted_indices.pop_back();
+    best_cuts_.push_back(i);
+    scored_cuts_++;
+  }
+
+  while (scored_cuts_ < max_cuts && !sorted_indices.empty()) {
+    const i_t i = sorted_indices.back();
+    sorted_indices.pop_back();
+
+    if (cut_distances_[i] <= min_cut_distance) { break; }
+
+    f_t cut_ortho            = 1.0;
+    const i_t best_cuts_size = best_cuts_.size();
+    for (i_t k = 0; k < best_cuts_size; k++) {
+      const i_t j = best_cuts_[k];
+      cut_ortho   = std::min(cut_ortho, cut_orthogonality(i, j));
+    }
+    if (cut_ortho >= min_orthogonality) {
+      best_cuts_.push_back(i);
+      scored_cuts_++;
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t cut_pool_t<i_t, f_t>::get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts,
+                                        std::vector<f_t>& best_rhs,
+                                        std::vector<cut_type_t>& best_cut_types)
+{
+  best_cuts.m = 0;
+  best_cuts.n = original_vars_;
+  best_cuts.row_start.clear();
+  best_cuts.j.clear();
+  best_cuts.x.clear();
+  best_cuts.row_start.reserve(scored_cuts_ + 1);
+  best_cuts.row_start.push_back(0);
+  best_rhs.clear();
+  best_rhs.reserve(scored_cuts_);
+  best_cut_types.clear();
+  best_cut_types.reserve(scored_cuts_);
+
+  for (i_t i : best_cuts_) {
+    sparse_vector_t<i_t, f_t> cut(cut_storage_, i);
+    cut.negate();
+    best_cuts.append_row(cut);
+    best_rhs.push_back(-rhs_storage_[i]);
+    best_cut_types.push_back(cut_type_[i]);
+  }
+
+  age_cuts();
+
+  return static_cast<i_t>(best_cuts_.size());
+}
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::age_cuts()
+{
+  for (i_t i = 0; i < cut_age_.size(); i++) {
+    cut_age_[i]++;
+  }
+}
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::drop_cuts()
+{
+  // TODO: Implement this
+}
+
+template <typename i_t, typename f_t>
+knapsack_generation_t<i_t, f_t>::knapsack_generation_t(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types)
+  : settings_(settings)
+{
+  const bool verbose = false;
+  knapsack_constraints_.reserve(lp.num_rows);
+
+  is_slack_.resize(lp.num_cols, 0);
+  for (i_t j : new_slacks) {
+    is_slack_[j] = 1;
+  }
+
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    const i_t row_start = Arow.row_start[i];
+    const i_t row_end   = Arow.row_start[i + 1];
+    const i_t row_len   = row_end - row_start;
+    if (row_len < 3) { continue; }
+    bool is_knapsack = true;
+    f_t sum_pos      = 0.0;
+    for (i_t p = row_start; p < row_end; p++) {
+      const i_t j = Arow.j[p];
+      if (is_slack_[j]) { continue; }
+      const f_t aj = Arow.x[p];
+      if (std::abs(aj - std::round(aj)) > settings.integer_tol) {
+        is_knapsack = false;
+        break;
+      }
+      if (var_types[j] != variable_type_t::INTEGER || lp.lower[j] != 0.0 || lp.upper[j] != 1.0) {
+        is_knapsack = false;
+        break;
+      }
+      if (aj < 0.0) {
+        is_knapsack = false;
+        break;
+      }
+      sum_pos += aj;
+    }
+
+    if (is_knapsack) {
+      const f_t beta = lp.rhs[i];
+      if (std::abs(beta - std::round(beta)) <= settings.integer_tol) {
+        if (beta > 0.0 && beta <= sum_pos && std::abs(sum_pos / (row_len - 1) - beta) > 1e-3) {
+          if (verbose) {
+            settings.log.printf(
+              "Knapsack constraint %d row len %d beta %e sum_pos %e sum_pos / (row_len - 1) %e\n",
+              i,
+              row_len,
+              beta,
+              sum_pos,
+              sum_pos / (row_len - 1));
+          }
+          knapsack_constraints_.push_back(i);
+        }
+      }
+    }
+  }
+
+#ifdef PRINT_KNAPSACK_INFO
+  i_t num_knapsack_constraints = knapsack_constraints_.size();
+  settings.log.printf("Number of knapsack constraints %d\n", num_knapsack_constraints);
+#endif
+}
+
+template <typename i_t, typename f_t>
+i_t knapsack_generation_t<i_t, f_t>::generate_knapsack_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  const std::vector<f_t>& xstar,
+  i_t knapsack_row,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  const bool verbose = false;
+  // Get the row associated with the knapsack constraint
+  sparse_vector_t<i_t, f_t> knapsack_inequality(Arow, knapsack_row);
+  f_t knapsack_rhs = lp.rhs[knapsack_row];
+
+  // Remove the slacks from the inequality
+  f_t seperation_rhs = 0.0;
+  if (verbose) { settings.log.printf(" Knapsack : "); }
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (is_slack_[j]) {
+      knapsack_inequality.x[k] = 0.0;
+    } else {
+      if (verbose) { settings.log.printf(" %g x%d +", knapsack_inequality.x[k], j); }
+      seperation_rhs += knapsack_inequality.x[k];
+    }
+  }
+  if (verbose) { settings.log.printf(" <= %g\n", knapsack_rhs); }
+  seperation_rhs -= (knapsack_rhs + 1);
+
+  if (verbose) {
+    settings.log.printf("\t");
+    for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+      const i_t j = knapsack_inequality.i[k];
+      if (!is_slack_[j]) {
+        if (std::abs(xstar[j]) > 1e-3) { settings.log.printf("x_relax[%d]= %g ", j, xstar[j]); }
+      }
+    }
+    settings.log.printf("\n");
+
+    settings.log.printf("seperation_rhs %g\n", seperation_rhs);
+  }
+  if (seperation_rhs <= 0.0) { return -1; }
+
+  std::vector<f_t> values;
+  values.resize(knapsack_inequality.i.size() - 1);
+  std::vector<f_t> weights;
+  weights.resize(knapsack_inequality.i.size() - 1);
+  i_t h                  = 0;
+  f_t objective_constant = 0.0;
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (!is_slack_[j]) {
+      const f_t vj = std::min(1.0, std::max(0.0, 1.0 - xstar[j]));
+      objective_constant += vj;
+      values[h]  = vj;
+      weights[h] = knapsack_inequality.x[k];
+      h++;
+    }
+  }
+  std::vector<f_t> solution;
+  solution.resize(knapsack_inequality.i.size() - 1);
+
+  if (verbose) { settings.log.printf("Calling solve_knapsack_problem\n"); }
+  f_t objective = solve_knapsack_problem(values, weights, seperation_rhs, solution);
+  if (std::isnan(objective)) { return -1; }
+  if (verbose) {
+    settings.log.printf("objective %e objective_constant %e\n", objective, objective_constant);
+  }
+  f_t seperation_value = -objective + objective_constant;
+  if (verbose) { settings.log.printf("seperation_value %e\n", seperation_value); }
+  const f_t tol = 1e-6;
+  if (seperation_value >= 1.0 - tol) { return -1; }
+
+  i_t cover_size = 0;
+  for (i_t k = 0; k < solution.size(); k++) {
+    if (solution[k] == 0.0) { cover_size++; }
+  }
+
+  cut.i.clear();
+  cut.x.clear();
+  cut.i.reserve(cover_size);
+  cut.x.reserve(cover_size);
+
+  h = 0;
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (!is_slack_[j]) {
+      if (solution[h] == 0.0) {
+        cut.i.push_back(j);
+        cut.x.push_back(-1.0);
+      }
+      h++;
+    }
+  }
+  cut_rhs = -cover_size + 1;
+  cut.sort();
+
+  // The cut is in the form: - sum_{j in cover} x_j >= -cover_size + 1
+  // Which is equivalent to: sum_{j in cover} x_j <= cover_size - 1
+
+  // Verify the cut is violated
+  f_t dot       = cut.dot(xstar);
+  f_t violation = dot - cut_rhs;
+  if (verbose) {
+    settings.log.printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation);
+  }
+
+  if (violation >= -tol) { return -1; }
+
+#ifdef PRINT_KNAPSACK_CUT
+  settings.log.printf("knapsack cut (cover %d): \n", cover_size);
+  for (i_t k = 0; k < cut.i.size(); k++) {
+    settings.log.printf("x%d coeff %g value %g\n", cut.i[k], -cut.x[k], xstar[cut.i[k]]);
+  }
+  settings.log.printf("cut_rhs %g\n", -cut_rhs);
+#endif
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+f_t knapsack_generation_t<i_t, f_t>::greedy_knapsack_problem(const std::vector<f_t>& values,
+                                                             const std::vector<f_t>& weights,
+                                                             f_t rhs,
+                                                             std::vector<f_t>& solution)
+{
+  i_t n = weights.size();
+  solution.assign(n, 0.0);
+
+  // Build permutation
+  std::vector<i_t> perm(n);
+  std::iota(perm.begin(), perm.end(), 0);
+
+  std::vector<f_t> ratios;
+  ratios.resize(n);
+  for (i_t i = 0; i < n; i++) {
+    ratios[i] = values[i] / weights[i];
+  }
+
+  // Sort by value / weight ratio
+  std::sort(perm.begin(), perm.end(), [&](i_t i, i_t j) { return ratios[i] > ratios[j]; });
+
+  // Greedy select items with the best value / weight ratio until the remaining capacity is
+  // exhausted
+  f_t remaining   = rhs;
+  f_t total_value = 0.0;
+
+  for (i_t j : perm) {
+    if (weights[j] <= remaining) {
+      solution[j] = 1.0;
+      remaining -= weights[j];
+      total_value += values[j];
+    }
+  }
+
+  // Best single-item fallback
+  f_t best_single_value = 0.0;
+  i_t best_single_idx   = -1;
+
+  for (i_t j = 0; j < n; ++j) {
+    if (weights[j] <= rhs && values[j] > best_single_value) {
+      best_single_value = values[j];
+      best_single_idx   = j;
+    }
+  }
+
+  if (best_single_value > total_value) {
+    solution.assign(n, 0.0);
+    solution[best_single_idx] = 1.0;
+    return best_single_value;
+  }
+
+  return total_value;
+}
+
+template <typename i_t, typename f_t>
+f_t knapsack_generation_t<i_t, f_t>::solve_knapsack_problem(const std::vector<f_t>& values,
+                                                            const std::vector<f_t>& weights,
+                                                            f_t rhs,
+                                                            std::vector<f_t>& solution)
+{
+  // Solve the knapsack problem
+  // maximize sum_{j=0}^n values[j] * solution[j]
+  // subject to sum_{j=0}^n weights[j] * solution[j] <= rhs
+  // values: values of the items
+  // weights: weights of the items
+  // return the value of the solution
+
+  // Using approximate dynamic programming
+
+  i_t n         = weights.size();
+  f_t objective = std::numeric_limits<f_t>::quiet_NaN();
+
+  // Compute the maximum value
+  f_t vmax = *std::max_element(values.begin(), values.end());
+
+  // Check if all the values are integers
+  bool all_integers     = true;
+  const f_t integer_tol = 1e-5;
+  for (i_t j = 0; j < n; j++) {
+    if (std::abs(values[j] - std::round(values[j])) > integer_tol) {
+      all_integers = false;
+      break;
+    }
+  }
+
+  const bool verbose = false;
+
+  if (verbose) { settings_.log.printf("all_integers %d\n", all_integers); }
+
+  // Compute the scaling factor and comptue the scaled integer values
+  f_t scale = 1.0;
+  std::vector<i_t> scaled_values(n);
+  if (all_integers) {
+    for (i_t j = 0; j < n; j++) {
+      scaled_values[j] = static_cast<i_t>(std::floor(values[j]));
+    }
+  } else {
+    const f_t epsilon = 0.1;
+    scale             = epsilon * vmax / static_cast<f_t>(n);
+    if (scale <= 0.0) { return std::numeric_limits<f_t>::quiet_NaN(); }
+    if (verbose) {
+      settings_.log.printf("scale %g epsilon %g vmax %g n %d\n", scale, epsilon, vmax, n);
+    }
+    for (i_t i = 0; i < n; ++i) {
+      scaled_values[i] = static_cast<i_t>(std::floor(values[i] / scale));
+    }
+  }
+
+  i_t sum_value     = std::accumulate(scaled_values.begin(), scaled_values.end(), 0);
+  const i_t INT_INF = std::numeric_limits<i_t>::max() / 2;
+  if (verbose) { settings_.log.printf("sum value %d\n", sum_value); }
+  const i_t max_size = 10000;
+  if (sum_value <= 0.0 || sum_value >= max_size) {
+    if (verbose) {
+      settings_.log.printf("sum value %d is negative or too large using greedy solution\n",
+                           sum_value);
+    }
+    return greedy_knapsack_problem(values, weights, rhs, solution);
+  }
+
+  // dp(j, v) = minimum weight using first j items to get value v
+  dense_matrix_t<i_t, i_t> dp(n + 1, sum_value + 1, INT_INF);
+  dense_matrix_t<i_t, uint8_t> take(n + 1, sum_value + 1, 0);
+  dp(0, 0) = 0;
+
+  // 4. Dynamic programming
+  for (i_t j = 1; j <= n; ++j) {
+    for (i_t v = 0; v <= sum_value; ++v) {
+      // Do not take item i-1
+      dp(j, v) = dp(j - 1, v);
+
+      // Take item j-1 if possible
+      if (v >= scaled_values[j - 1]) {
+        i_t candidate =
+          dp(j - 1, v - scaled_values[j - 1]) + static_cast<i_t>(std::floor(weights[j - 1]));
+        if (candidate < dp(j, v)) {
+          dp(j, v)   = candidate;
+          take(j, v) = 1;
+        }
+      }
+    }
+  }
+
+  // 5. Find best achievable value within capacity
+  i_t best_value = 0;
+  for (i_t v = 0; v <= sum_value; ++v) {
+    if (dp(n, v) <= rhs) { best_value = v; }
+  }
+
+  // 6. Backtrack to recover solution
+  i_t v = best_value;
+  for (i_t j = n; j >= 1; --j) {
+    if (take(j, v)) {
+      solution[j - 1] = 1.0;
+      v -= scaled_values[j - 1];
+    } else {
+      solution[j - 1] = 0.0;
+    }
+  }
+
+  objective = best_value * scale;
+  return objective;
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
+                                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                                               csr_matrix_t<i_t, f_t>& Arow,
+                                               const std::vector<i_t>& new_slacks,
+                                               const std::vector<variable_type_t>& var_types,
+                                               basis_update_mpf_t<i_t, f_t>& basis_update,
+                                               const std::vector<f_t>& xstar,
+                                               const std::vector<i_t>& basic_list,
+                                               const std::vector<i_t>& nonbasic_list)
+{
+  // Generate Gomory and CG Cuts
+  if (settings.mixed_integer_gomory_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) {
+    f_t cut_start_time = tic();
+    generate_gomory_cuts(
+      lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list);
+    f_t cut_generation_time = toc(cut_start_time);
+    if (cut_generation_time > 1.0) {
+      settings.log.debug("Gomory and CG cut generation time %.2f seconds\n", cut_generation_time);
+    }
+  }
+
+  // Generate Knapsack cuts
+  if (settings.knapsack_cuts != 0) {
+    f_t cut_start_time = tic();
+    generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar);
+    f_t cut_generation_time = toc(cut_start_time);
+    if (cut_generation_time > 1.0) {
+      settings.log.debug("Knapsack cut generation time %.2f seconds\n", cut_generation_time);
+    }
+  }
+
+  // Generate MIR and CG cuts
+  if (settings.mir_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) {
+    f_t cut_start_time = tic();
+    generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar);
+    f_t cut_generation_time = toc(cut_start_time);
+    if (cut_generation_time > 1.0) {
+      settings.log.debug("MIR and CG cut generation time %.2f seconds\n", cut_generation_time);
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_knapsack_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  const std::vector<f_t>& xstar)
+{
+  if (knapsack_generation_.num_knapsack_constraints() > 0) {
+    for (i_t knapsack_row : knapsack_generation_.get_knapsack_constraints()) {
+      sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
+      f_t cut_rhs;
+      i_t knapsack_status = knapsack_generation_.generate_knapsack_cuts(
+        lp, settings, Arow, new_slacks, var_types, xstar, knapsack_row, cut, cut_rhs);
+      if (knapsack_status == 0) { cut_pool_.add_cut(cut_type_t::KNAPSACK, cut, cut_rhs); }
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_mir_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  const std::vector<f_t>& xstar)
+{
+  f_t mir_start_time = tic();
+  mixed_integer_rounding_cut_t<i_t, f_t> mir(lp, settings, new_slacks, xstar);
+  strong_cg_cut_t<i_t, f_t> cg(lp, var_types, xstar);
+
+  std::vector<i_t> slack_map(lp.num_rows, -1);
+  for (i_t slack : new_slacks) {
+    const i_t col_start = lp.A.col_start[slack];
+    const i_t col_end   = lp.A.col_start[slack + 1];
+    const i_t col_len   = col_end - col_start;
+    assert(col_len == 1);
+    const i_t i  = lp.A.i[col_start];
+    slack_map[i] = slack;
+  }
+
+  // Compute initial scores for all rows
+  std::vector<f_t> score(lp.num_rows, 0.0);
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    const i_t row_start = Arow.row_start[i];
+    const i_t row_end   = Arow.row_start[i + 1];
+
+    const i_t row_nz          = row_end - row_start;
+    i_t num_integer_in_row    = 0;
+    i_t num_continuous_in_row = 0;
+    for (i_t p = row_start; p < row_end; p++) {
+      const i_t j = Arow.j[p];
+      if (var_types[j] == variable_type_t::INTEGER) {
+        num_integer_in_row++;
+      } else {
+        num_continuous_in_row++;
+      }
+    }
+
+    if (num_integer_in_row == 0) {
+      score[i] = 0.0;
+
+    } else {
+      f_t nz_score = lp.num_cols - row_nz;
+
+      const i_t slack = slack_map[i];
+      assert(slack >= 0);
+      const f_t slack_value = xstar[slack];
+
+      f_t slack_score = -std::log10(1e-16 + std::abs(slack_value));
+
+      const f_t nz_weight      = 1.0;
+      const f_t slack_weight   = 1.0;
+      const f_t integer_weight = 1.0;
+
+      score[i] =
+        nz_weight * nz_score + slack_weight * slack_score + integer_weight * num_integer_in_row;
+    }
+  }
+
+  // Sort the rows by score
+  std::vector<i_t> sorted_indices;
+  best_score_last_permutation(score, sorted_indices);
+
+  // These data structures are used to track the rows that have been aggregated
+  // The invariant is that aggregated_rows is empty and aggregated_mark is all zeros
+  // at the beginning of each iteration of the for loop below
+  std::vector<i_t> aggregated_rows;
+  std::vector<i_t> aggregated_mark(lp.num_rows, 0);
+
+  const i_t max_cuts = std::min(lp.num_rows, 1000);
+  f_t work_estimate  = 0.0;
+  for (i_t h = 0; h < max_cuts; h++) {
+    // Get the row with the highest score
+    const i_t i = sorted_indices.back();
+    sorted_indices.pop_back();
+    const f_t max_score = score[i];
+
+    const i_t row_nz      = Arow.row_start[i + 1] - Arow.row_start[i];
+    const i_t slack       = slack_map[i];
+    const f_t slack_value = xstar[slack];
+
+    if (max_score <= 0.0) { break; }
+    if (work_estimate > 2e9) { break; }
+
+    sparse_vector_t<i_t, f_t> inequality(Arow, i);
+    work_estimate += inequality.i.size();
+    f_t inequality_rhs         = lp.rhs[i];
+    const bool generate_cg_cut = settings.strong_chvatal_gomory_cuts != 0;
+    f_t fractional_part_rhs    = fractional_part(inequality_rhs);
+    if (generate_cg_cut && fractional_part_rhs > 1e-6 && fractional_part_rhs < (1 - 1e-6)) {
+      // Try to generate a CG cut
+      sparse_vector_t<i_t, f_t> cg_inequality = inequality;
+      f_t cg_inequality_rhs                   = inequality_rhs;
+      if (fractional_part(inequality_rhs) < 0.5) {
+        // Multiply by -1 to force the fractional part to be greater than 0.5
+        cg_inequality_rhs *= -1;
+        cg_inequality.negate();
+      }
+      sparse_vector_t<i_t, f_t> cg_cut(lp.num_cols, 0);
+      f_t cg_cut_rhs;
+      i_t cg_status = cg.generate_strong_cg_cut(
+        lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs);
+      if (cg_status == 0) { cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); }
+    }
+
+    // Remove the slack from the equality to get an inequality
+    work_estimate += inequality.i.size();
+    i_t negate_inequality = 1;
+    for (i_t k = 0; k < inequality.i.size(); k++) {
+      const i_t j = inequality.i[k];
+      if (j == slack) {
+        if (inequality.x[k] != 1.0) {
+          if (inequality.x[k] == -1.0 && lp.lower[j] >= 0.0) {
+            negate_inequality = 0;
+          } else {
+            settings.log.debug("Bad slack %d in inequality: aj %e lo %e up %e\n",
+                               j,
+                               inequality.x[k],
+                               lp.lower[j],
+                               lp.upper[j]);
+            negate_inequality = -1;
+            break;
+          }
+        }
+        inequality.x[k] = 0.0;
+      }
+    }
+
+    if (negate_inequality == -1) { continue; }
+
+    if (negate_inequality) {
+      // inequaility'*x <= inequality_rhs
+      // But for MIR we need: inequality'*x >= inequality_rhs
+      inequality_rhs *= -1;
+      inequality.negate();
+      work_estimate += inequality.i.size();
+    }
+    // We should now have: inequality'*x >= inequality_rhs
+
+    // Transform the relaxation solution
+    std::vector<f_t> transformed_xstar;
+    mir.relaxation_to_nonnegative(lp, xstar, transformed_xstar);
+    work_estimate += transformed_xstar.size();
+
+    sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
+    f_t cut_rhs;
+    bool add_cut             = false;
+    i_t num_aggregated       = 0;
+    const i_t max_aggregated = 6;
+    work_estimate += lp.num_cols;
+
+    while (!add_cut && num_aggregated < max_aggregated) {
+      sparse_vector_t<i_t, f_t> transformed_inequality;
+      inequality.squeeze(transformed_inequality);
+      f_t transformed_rhs = inequality_rhs;
+      work_estimate += transformed_inequality.i.size();
+
+      mir.to_nonnegative(lp, transformed_inequality, transformed_rhs);
+      work_estimate += transformed_inequality.i.size();
+      std::vector<sparse_vector_t<i_t, f_t>> transformed_cuts;
+      std::vector<f_t> transformed_cut_rhs;
+      std::vector<f_t> transformed_violations;
+
+      //  Generate cut for delta = 1
+      {
+        sparse_vector_t<i_t, f_t> cut_1(lp.num_cols, 0);
+        f_t cut_1_rhs;
+        mir.generate_cut_nonnegative(
+          transformed_inequality, transformed_rhs, var_types, cut_1, cut_1_rhs);
+        f_t cut_1_violation = mir.compute_violation(cut_1, cut_1_rhs, transformed_xstar);
+        if (cut_1_violation > 1e-6) {
+          transformed_cuts.push_back(cut_1);
+          transformed_cut_rhs.push_back(cut_1_rhs);
+          transformed_violations.push_back(cut_1_violation);
+        }
+        work_estimate += transformed_inequality.i.size();
+      }
+
+      // Generate a cut for delta = max { |a_j|, j in I}
+      {
+        f_t max_coeff = 0.0;
+        for (i_t k = 0; k < transformed_inequality.i.size(); k++) {
+          const i_t j = transformed_inequality.i[k];
+          if (var_types[j] == variable_type_t::INTEGER) {
+            const f_t abs_aj = std::abs(transformed_inequality.x[k]);
+            if (abs_aj > max_coeff) { max_coeff = abs_aj; }
+          }
+        }
+        work_estimate += transformed_inequality.i.size();
+
+        if (max_coeff > 1e-6 && max_coeff != 1.0) {
+          sparse_vector_t<i_t, f_t> scaled_inequality = transformed_inequality;
+          const i_t nz                                = transformed_inequality.i.size();
+          for (i_t k = 0; k < nz; k++) {
+            scaled_inequality.x[k] /= max_coeff;
+          }
+          const f_t scaled_rhs = transformed_rhs / max_coeff;
+          sparse_vector_t<i_t, f_t> cut_2(lp.num_cols, 0);
+          f_t cut_2_rhs;
+          mir.generate_cut_nonnegative(scaled_inequality, scaled_rhs, var_types, cut_2, cut_2_rhs);
+          f_t cut_2_violation = mir.compute_violation(cut_2, cut_2_rhs, transformed_xstar);
+          if (cut_2_violation > 1e-6) {
+            transformed_cuts.push_back(cut_2);
+            transformed_cut_rhs.push_back(cut_2_rhs);
+            transformed_violations.push_back(cut_2_violation);
+          }
+          work_estimate += 5 * transformed_inequality.i.size();
+        }
+      }
+
+      if (!transformed_violations.empty()) {
+        std::vector<i_t> permuted(transformed_violations.size());
+        std::iota(permuted.begin(), permuted.end(), 0);
+        std::sort(permuted.begin(), permuted.end(), [&](i_t i, i_t j) {
+          return transformed_violations[i] > transformed_violations[j];
+        });
+        work_estimate += transformed_violations.size() * std::log2(transformed_violations.size());
+        // Get the biggest violation
+        const i_t best_index = permuted[0];
+        f_t max_viol         = transformed_violations[best_index];
+        cut                  = transformed_cuts[best_index];
+        cut_rhs              = transformed_cut_rhs[best_index];
+
+        if (max_viol > 1e-6) {
+          // TODO: Divide by 1/2*violation, 1/4*violation, 1/8*violation
+          // Transform back to the original variables
+          mir.to_original(lp, cut, cut_rhs);
+          mir.remove_small_coefficients(lp.lower, lp.upper, cut, cut_rhs);
+          mir.substitute_slacks(lp, Arow, cut, cut_rhs);
+          f_t viol = mir.compute_violation(cut, cut_rhs, xstar);
+          work_estimate += 10 * cut.i.size();
+          add_cut = true;
+        }
+      }
+
+      if (add_cut) {
+        if (settings.mir_cuts != 0) {
+          cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs);
+        }
+        break;
+      } else {
+        // Perform aggregation to try and find a cut
+
+        // Find all the continuous variables in the inequality
+        i_t num_continuous    = 0;
+        f_t max_off_bound     = 0.0;
+        i_t max_off_bound_var = -1;
+        for (i_t p = 0; p < inequality.i.size(); p++) {
+          const i_t j = inequality.i[p];
+          if (var_types[j] == variable_type_t::CONTINUOUS) {
+            num_continuous++;
+
+            const f_t off_lower = lp.lower[j] > -inf ? xstar[j] - lp.lower[j] : std::abs(xstar[j]);
+            const f_t off_upper = lp.upper[j] < inf ? lp.upper[j] - xstar[j] : std::abs(xstar[j]);
+            const f_t off_bound = std::max(off_lower, off_upper);
+            const i_t col_start = lp.A.col_start[j];
+            const i_t col_end   = lp.A.col_start[j + 1];
+            const i_t col_len   = col_end - col_start;
+            if (off_bound > max_off_bound && col_len > 1) {
+              max_off_bound     = off_bound;
+              max_off_bound_var = j;
+            }
+          }
+        }
+        work_estimate += 10 * inequality.i.size();
+
+        if (num_continuous == 0 || max_off_bound < 1e-6) { break; }
+
+        // The variable that is farthest from its bound is used as a pivot
+        if (max_off_bound_var >= 0) {
+          const i_t col_start          = lp.A.col_start[max_off_bound_var];
+          const i_t col_end            = lp.A.col_start[max_off_bound_var + 1];
+          const i_t col_len            = col_end - col_start;
+          const i_t max_potential_rows = 10;
+          if (col_len > 1) {
+            std::vector<i_t> potential_rows;
+            potential_rows.reserve(col_len);
+
+            const f_t threshold = 1e-4;
+            for (i_t q = col_start; q < col_end; q++) {
+              const i_t i   = lp.A.i[q];
+              const f_t val = lp.A.x[q];
+              // Can't use rows that have already been aggregated
+              if (std::abs(val) > threshold && aggregated_mark[i] == 0) {
+                potential_rows.push_back(i);
+              }
+              if (potential_rows.size() >= max_potential_rows) { break; }
+            }
+            work_estimate += 5 * (col_end - col_start);
+
+            if (!potential_rows.empty()) {
+              std::sort(potential_rows.begin(), potential_rows.end(), [&](i_t a, i_t b) {
+                return score[a] > score[b];
+              });
+              work_estimate += 10 * std::log2(10);
+
+              const i_t pivot_row = potential_rows[0];
+
+              sparse_vector_t<i_t, f_t> pivot_row_inequality(Arow, pivot_row);
+              f_t pivot_row_rhs = lp.rhs[pivot_row];
+              work_estimate += pivot_row_inequality.i.size();
+              mir.combine_rows(lp,
+                               Arow,
+                               max_off_bound_var,
+                               pivot_row_inequality,
+                               pivot_row_rhs,
+                               inequality,
+                               inequality_rhs);
+              aggregated_rows.push_back(pivot_row);
+              aggregated_mark[pivot_row] = 1;
+              work_estimate += inequality.i.size() + pivot_row_inequality.i.size();
+            } else {
+              // No potential rows to aggregate
+              break;
+            }
+          }
+        }
+        num_aggregated++;  // Always increase so the loop terminates
+      }
+    }
+
+    if (add_cut) {
+      // We were successful in generating a cut.
+
+      // Set the score of the aggregated rows to zero
+      for (i_t row : aggregated_rows) {
+        score[row] = 0.0;
+      }
+    }
+
+    // Clear the aggregated mark
+    for (i_t row : aggregated_rows) {
+      aggregated_mark[row] = 0;
+    }
+    work_estimate += 2 * aggregated_rows.size();
+    // Clear the aggregated rows
+    aggregated_rows.clear();
+
+    // Set the score of the current row to zero
+    score[i] = 0.0;
+
+    // Re-sort the rows by score
+    // It's possible this could be made more efficient by storing the rows in a data structure
+    // that allows us to:
+    // 1. Get the row with the best score
+    // 2. Get the row with a nonzero in column j that has the best score
+    // 3. Remove the rows that have been aggregated
+    // 4. Remove the current row
+    best_score_last_permutation(score, sorted_indices);
+    work_estimate += score.size() * std::log2(score.size());
+  }
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  const std::vector<f_t>& xstar,
+  const std::vector<i_t>& basic_list,
+  const std::vector<i_t>& nonbasic_list)
+{
+  tableau_equality_t<i_t, f_t> tableau(lp, basis_update, nonbasic_list);
+  mixed_integer_rounding_cut_t<i_t, f_t> mir(lp, settings, new_slacks, xstar);
+  strong_cg_cut_t<i_t, f_t> cg(lp, var_types, xstar);
+
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    sparse_vector_t<i_t, f_t> inequality(lp.num_cols, 0);
+    f_t inequality_rhs;
+    const i_t j = basic_list[i];
+    if (var_types[j] != variable_type_t::INTEGER) { continue; }
+    const f_t x_j = xstar[j];
+    if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { continue; }
+    i_t tableau_status = tableau.generate_base_equality(lp,
+                                                        settings,
+                                                        Arow,
+                                                        var_types,
+                                                        basis_update,
+                                                        xstar,
+                                                        basic_list,
+                                                        nonbasic_list,
+                                                        i,
+                                                        inequality,
+                                                        inequality_rhs);
+    if (tableau_status == 0) {
+      // Generate a CG cut
+      const bool generate_cg_cut = settings.strong_chvatal_gomory_cuts != 0;
+      if (generate_cg_cut) {
+        // Try to generate a CG cut
+        sparse_vector_t<i_t, f_t> cg_inequality = inequality;
+        f_t cg_inequality_rhs                   = inequality_rhs;
+        if (fractional_part(inequality_rhs) < 0.5) {
+          // Multiply by -1 to force the fractional part to be greater than 0.5
+          cg_inequality_rhs *= -1;
+          cg_inequality.negate();
+        }
+        sparse_vector_t<i_t, f_t> cg_cut(lp.num_cols, 0);
+        f_t cg_cut_rhs;
+        i_t cg_status = cg.generate_strong_cg_cut(
+          lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs);
+        if (cg_status == 0) { cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); }
+      }
+
+      if (settings.mixed_integer_gomory_cuts == 0) { continue; }
+
+      // Given the base inequality, generate a MIR cut
+      sparse_vector_t<i_t, f_t> cut_A(lp.num_cols, 0);
+      f_t cut_A_rhs;
+      i_t mir_status = mir.generate_cut(
+        inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_A, cut_A_rhs);
+      bool A_valid       = false;
+      f_t cut_A_distance = 0.0;
+      if (mir_status == 0) {
+        if (cut_A.i.size() == 0) { continue; }
+        mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs);
+        if (cut_A.i.size() == 0) {
+          A_valid = false;
+        } else {
+          // Check that the cut is violated
+          f_t dot      = cut_A.dot(xstar);
+          f_t cut_norm = cut_A.norm2_squared();
+          if (dot >= cut_A_rhs) { continue; }
+          cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm);
+          A_valid        = true;
+        }
+      }
+
+      // Negate the base inequality
+      inequality.negate();
+      inequality_rhs *= -1;
+
+      sparse_vector_t<i_t, f_t> cut_B(lp.num_cols, 0);
+      f_t cut_B_rhs;
+
+      mir_status = mir.generate_cut(
+        inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_B, cut_B_rhs);
+      bool B_valid       = false;
+      f_t cut_B_distance = 0.0;
+      if (mir_status == 0) {
+        if (cut_B.i.size() == 0) { continue; }
+        mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs);
+        if (cut_B.i.size() == 0) {
+          B_valid = false;
+        } else {
+          // Check that the cut is violated
+          f_t dot      = cut_B.dot(xstar);
+          f_t cut_norm = cut_B.norm2_squared();
+          if (dot >= cut_B_rhs) { continue; }
+          cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm);
+          B_valid        = true;
+        }
+      }
+
+      if ((cut_A_distance > cut_B_distance) && A_valid) {
+        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A, cut_A_rhs);
+      } else if (B_valid) {
+        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B, cut_B_rhs);
+      }
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t tableau_equality_t<i_t, f_t>::generate_base_equality(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<variable_type_t>& var_types,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  const std::vector<f_t>& xstar,
+  const std::vector<i_t>& basic_list,
+  const std::vector<i_t>& nonbasic_list,
+  i_t i,
+  sparse_vector_t<i_t, f_t>& inequality,
+  f_t& inequality_rhs)
+{
+  // Let's look for Gomory cuts
+  const i_t j = basic_list[i];
+  if (var_types[j] != variable_type_t::INTEGER) { return -1; }
+  const f_t x_j = xstar[j];
+  if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { return -1; }
+#ifdef PRINT_CUT_INFO
+  settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i);
+#endif
+
+  // Solve B^T u_bar = e_i
+  sparse_vector_t<i_t, f_t> e_i(lp.num_rows, 1);
+  e_i.i[0] = i;
+  e_i.x[0] = 1.0;
+  sparse_vector_t<i_t, f_t> u_bar(lp.num_rows, 0);
+  basis_update.b_transpose_solve(e_i, u_bar);
+
+#ifdef CHECK_B_TRANSPOSE_SOLVE
+  std::vector<f_t> u_bar_dense(lp.num_rows);
+  u_bar.to_dense(u_bar_dense);
+
+  std::vector<f_t> BTu_bar(lp.num_rows);
+  b_transpose_multiply(lp, basic_list, u_bar_dense, BTu_bar);
+  for (i_t k = 0; k < lp.num_rows; k++) {
+    if (k == i) {
+      settings.log.printf("BTu_bar %d error %e\n", k, std::abs(BTu_bar[k] - 1.0));
+      if (std::abs(BTu_bar[k] - 1.0) > 1e-10) {
+        settings.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
+        assert(false);
+      }
+    } else {
+      settings.log.printf("BTu_bar %d error %e\n", k, std::abs(BTu_bar[k]));
+      if (std::abs(BTu_bar[k]) > 1e-10) {
+        settings.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
+        assert(false);
+      }
+    }
+  }
+#endif
+
+  // Compute a_bar = N^T u_bar
+  // TODO: This is similar to a function in phase2 of dual simplex. See if it can be reused.
+  const i_t nz_ubar = u_bar.i.size();
+  std::vector<i_t> abar_indices;
+  abar_indices.reserve(nz_ubar);
+  for (i_t k = 0; k < nz_ubar; k++) {
+    const i_t ii        = u_bar.i[k];
+    const f_t u_bar_i   = u_bar.x[k];
+    const i_t row_start = Arow.row_start[ii];
+    const i_t row_end   = Arow.row_start[ii + 1];
+    for (i_t p = row_start; p < row_end; p++) {
+      const i_t jj = Arow.j[p];
+      if (nonbasic_mark_[jj] == 1) {
+        const f_t val    = u_bar_i * Arow.x[p];
+        const f_t y      = val - c_workspace_[jj];
+        const f_t t      = x_workspace_[jj] + y;
+        c_workspace_[jj] = (t - x_workspace_[jj]) - y;
+        x_workspace_[jj] = t;
+        if (!x_mark_[jj]) {
+          x_mark_[jj] = 1;
+          abar_indices.push_back(jj);
+        }
+      }
+    }
+  }
+  // TODO: abar has lots of small coefficients. Double check that
+  // we do not accidently create a base (in)equality
+  // that cuts off an integer solution, when we drop the small coefficients.
+
+  i_t small_coeff              = 0;
+  const f_t drop_tol           = 1e-12;
+  const bool drop_coefficients = true;
+  sparse_vector_t<i_t, f_t> a_bar(lp.num_cols, 0);
+  a_bar.i.reserve(abar_indices.size() + 1);
+  a_bar.x.reserve(abar_indices.size() + 1);
+  for (i_t k = 0; k < abar_indices.size(); k++) {
+    const i_t jj = abar_indices[k];
+    if (drop_coefficients && std::abs(x_workspace_[jj]) < drop_tol) {
+      small_coeff++;
+    } else {
+      a_bar.i.push_back(jj);
+      a_bar.x.push_back(x_workspace_[jj]);
+    }
+  }
+  const bool verbose = false;
+  if (verbose && small_coeff > 0) { settings.log.printf("Small coeff dropped %d\n", small_coeff); }
+
+  // Clear the workspace
+  for (i_t jj : abar_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+    c_workspace_[jj] = 0.0;
+  }
+  abar_indices.clear();
+
+  // We should now have the base inequality
+  // x_j + a_bar^T x_N >= b_bar_i
+  // We add x_j into a_bar so that everything is in a single sparse_vector_t
+  a_bar.i.push_back(j);
+  a_bar.x.push_back(1.0);
+
+  // Check that the tableau equality is satisfied
+  const f_t tableau_tol = 1e-6;
+  f_t a_bar_dot_xstar   = a_bar.dot(xstar);
+  if (std::abs(a_bar_dot_xstar - b_bar_[i]) > tableau_tol) {
+    settings.log.debug("bad tableau equality. error %e\n", std::abs(a_bar_dot_xstar - b_bar_[i]));
+    return -1;
+  }
+
+  // We have that x_j + a_bar^T x_N == b_bar_i
+  // So x_j + a_bar^T x_N >= b_bar_i
+  // And x_j + a_bar^T x_N <= b_bar_i
+  // Or -x_j - a_bar^T x_N >= -b_bar_i
+
+  // Skip cuts that are shallow
+  const f_t shallow_tol = 1e-2;
+  if (std::abs(x_j - std::round(x_j)) < shallow_tol) {
+    // Skip cuts where integer variable has small fractional part
+    return -1;
+  }
+
+  const f_t f_val = b_bar_[i] - std::floor(b_bar_[i]);
+  if (f_val < 0.01 || f_val > 0.99) {
+    // Skip cuts with rhs has small fractional part
+    return -1;
+  }
+
+#ifdef PRINT_BASE_INEQUALITY
+  // Print out the base inequality
+  for (i_t k = 0; k < a_bar.i.size(); k++) {
+    const i_t jj = a_bar.i[k];
+    const f_t aj = a_bar.x[k];
+    settings_.log.printf("a_bar[%d] = %e\n", k, aj);
+  }
+  settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]);
+#endif
+
+  inequality     = a_bar;
+  inequality_rhs = b_bar_[i];
+
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+mixed_integer_rounding_cut_t<i_t, f_t>::mixed_integer_rounding_cut_t(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<f_t>& xstar)
+  : num_vars_(lp.num_cols),
+    settings_(settings),
+    x_workspace_(num_vars_, 0.0),
+    x_mark_(num_vars_, 0),
+    has_lower_(num_vars_, 0),
+    has_upper_(num_vars_, 0),
+    is_slack_(num_vars_, 0),
+    slack_rows_(num_vars_, 0),
+    bound_info_(num_vars_, 0)
+{
+  for (i_t j : new_slacks) {
+    is_slack_[j]        = 1;
+    const i_t col_start = lp.A.col_start[j];
+    const i_t i         = lp.A.i[col_start];
+    slack_rows_[j]      = i;
+    assert(std::abs(lp.A.x[col_start]) == 1.0);
+  }
+
+  needs_complement_ = false;
+  for (i_t j = 0; j < num_vars_; j++) {
+    if (lp.lower[j] < 0) {
+      settings_.log.debug("Variable %d has negative lower bound %e\n", j, lp.lower[j]);
+    }
+    const f_t uj      = lp.upper[j];
+    const f_t lj      = lp.lower[j];
+    const f_t xstar_j = xstar[j];
+    if (uj < inf) {
+      if (uj - xstar_j <= xstar_j - lj) {
+        has_upper_[j]     = 1;
+        bound_info_[j]    = 1;
+        needs_complement_ = true;
+      } else if (lj != 0.0) {
+        has_lower_[j]     = 1;
+        bound_info_[j]    = -1;
+        needs_complement_ = true;
+      }
+      continue;
+    }
+
+    if (lj > -inf && lj != 0.0) {
+      has_lower_[j]     = 1;
+      bound_info_[j]    = -1;
+      needs_complement_ = true;
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::to_nonnegative(const lp_problem_t<i_t, f_t>& lp,
+                                                            sparse_vector_t<i_t, f_t>& inequality,
+                                                            f_t& rhs)
+{
+  const i_t nz = inequality.i.size();
+  for (i_t k = 0; k < nz; k++) {
+    const i_t j  = inequality.i[k];
+    const f_t aj = inequality.x[k];
+    if (bound_info_[j] == -1) {
+      // v_j = x_j - l_j, v_j >= 0
+      // x_j = v_j + l_j
+      // sum_{k != j} a_k x_j + a_j x_j <= beta
+      // sum_{k != j} a_k x_j + a_j (v_j + l_j) <= beta
+      // sum_{k != j} a_k x_j + a_j v_j <= beta - a_j l_j
+      const f_t lj = lp.lower[j];
+      rhs -= aj * lj;
+    } else if (bound_info_[j] == 1) {
+      // w_j = u_j - x_j, w_j >= 0
+      // x_j = u_j - w_j
+      // sum_{k != j} a_k x_k + a_j x_j <= beta
+      // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= beta
+      // sum_{k != j} a_k x_k - a_j w_j <= beta - a_j u_j
+      const f_t uj = lp.upper[j];
+      inequality.x[k] *= -1.0;
+      rhs -= aj * uj;
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::relaxation_to_nonnegative(
+  const lp_problem_t<i_t, f_t>& lp,
+  const std::vector<f_t>& xstar,
+  std::vector<f_t>& xstar_nonnegative)
+{
+  xstar_nonnegative = xstar;
+  const i_t n       = lp.num_cols;
+  for (i_t j = 0; j < n; ++j) {
+    if (bound_info_[j] == -1) {
+      // v_j = x_j - l_j
+      const f_t lj = lp.lower[j];
+      xstar_nonnegative[j] -= lj;
+    } else if (bound_info_[j] == 1) {
+      // w_j = u_j - x_j
+      const f_t uj         = lp.upper[j];
+      xstar_nonnegative[j] = uj - xstar_nonnegative[j];
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::to_original(const lp_problem_t<i_t, f_t>& lp,
+                                                         sparse_vector_t<i_t, f_t>& inequality,
+                                                         f_t& rhs)
+{
+  const i_t nz = inequality.i.size();
+  for (i_t k = 0; k < nz; k++) {
+    const i_t j  = inequality.i[k];
+    const f_t dj = inequality.x[k];
+    if (bound_info_[j] == -1) {
+      // v_j = x_j - l_j, v_j >= 0
+      // sum_{k != j} d_k x_k + d_j v_j >= beta
+      // sum_{k != j} d_k x_k + d_j (x_j - l_j) >= beta
+      // sum_{k != j} d_k x_k + d_j x_j >= beta + d_j l_j
+      const f_t lj = lp.lower[j];
+      rhs += dj * lj;
+    } else if (bound_info_[j] == 1) {
+      // w_j = u_j - x_j, w_j >= 0
+      // sum_{k != j} d_k x_k + d_j w_j >= beta
+      // sum_{k != j} d_k x_k + d_j (u_j - x_j) >= beta
+      // sum_{k != j} d_k x_k - d_j x_j  >= beta - d_j u_j
+      const f_t uj = lp.upper[j];
+      inequality.x[k] *= -1.0;
+      rhs -= dj * uj;
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::remove_small_coefficients(
+  const std::vector<f_t>& lower_bounds,
+  const std::vector<f_t>& upper_bounds,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  const i_t nz = cut.i.size();
+  i_t removed  = 0;
+  for (i_t k = 0; k < cut.i.size(); k++) {
+    const i_t j = cut.i[k];
+
+    // Check for small coefficients
+    const f_t aj = cut.x[k];
+    if (std::abs(aj) < 1e-6) {
+      if (aj >= 0.0 && upper_bounds[j] < inf) {
+        // Move this to the right-hand side
+        cut_rhs -= aj * upper_bounds[j];
+        cut.x[k] = 0.0;
+        removed++;
+      } else if (aj <= 0.0 && lower_bounds[j] > -inf) {
+        cut_rhs += aj * lower_bounds[j];
+        cut.x[k] = 0.0;
+        removed++;
+        continue;
+      } else {
+      }
+    }
+  }
+
+  if (removed > 0) {
+    sparse_vector_t<i_t, f_t> new_cut(cut.n, 0);
+    cut.squeeze(new_cut);
+    cut = new_cut;
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut_nonnegative(
+  const sparse_vector_t<i_t, f_t>& a,
+  f_t beta,
+  const std::vector<variable_type_t>& var_types,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  auto f = [](f_t q_1, f_t q_2) -> f_t {
+    f_t q_1_hat = q_1 - std::floor(q_1);
+    f_t q_2_hat = q_2 - std::floor(q_2);
+    return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1);
+  };
+
+  auto h = [](f_t q) -> f_t { return std::max(q, 0.0); };
+
+  std::vector<i_t> cut_indices;
+  cut_indices.reserve(a.i.size());
+  f_t R = (beta - std::floor(beta)) * std::ceil(beta);
+
+  for (i_t k = 0; k < a.i.size(); k++) {
+    const i_t jj = a.i[k];
+    f_t aj       = a.x[k];
+    if (var_types[jj] == variable_type_t::INTEGER) {
+      x_workspace_[jj] += f(aj, beta);
+      if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+        x_mark_[jj] = 1;
+        cut_indices.push_back(jj);
+      }
+    } else {
+      x_workspace_[jj] += h(aj);
+      if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+        x_mark_[jj] = 1;
+        cut_indices.push_back(jj);
+      }
+    }
+  }
+
+  cut.i.reserve(cut_indices.size());
+  cut.x.reserve(cut_indices.size());
+  cut.i.clear();
+  cut.x.clear();
+  for (i_t k = 0; k < cut_indices.size(); k++) {
+    const i_t j = cut_indices[k];
+    cut.i.push_back(j);
+    cut.x.push_back(x_workspace_[j]);
+  }
+
+  // Clear the workspace
+  for (i_t jj : cut_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+  }
+
+#ifdef CHECK_WORKSPACE
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      assert(x_workspace_[j] == 0.0);
+    }
+    if (x_mark_[j] != 0) {
+      printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      assert(x_mark_[j] == 0);
+    }
+  }
+#endif
+
+  // The new cut is: g'*x >= R
+  // But we want to have it in the form h'*x <= b
+  cut.sort();
+
+  cut_rhs = R;
+
+#ifdef CHECK_REPEATED_INDICES
+  // Check for repeated indicies
+  std::vector<i_t> check(num_vars_, 0);
+  for (i_t p = 0; p < cut.i.size(); p++) {
+    if (check[cut.i[p]] != 0) {
+      printf("repeated index in generated cut\n");
+      assert(check[cut.i[p]] == 0);
+    }
+    check[cut.i[p]] = 1;
+  }
+#endif
+
+  if (cut.i.size() == 0) { return -1; }
+
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
+  const sparse_vector_t<i_t, f_t>& a,
+  f_t beta,
+  const std::vector<f_t>& upper_bounds,
+  const std::vector<f_t>& lower_bounds,
+  const std::vector<variable_type_t>& var_types,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+#ifdef CHECK_WORKSPACE
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Before generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      printf("num_vars_ %d\n", num_vars_);
+      printf("x_workspace_.size() %ld\n", x_workspace_.size());
+      assert(x_workspace_[j] == 0.0);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Before generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      assert(x_mark_[j] == 0);
+    }
+  }
+#endif
+
+  auto f = [](f_t q_1, f_t q_2) -> f_t {
+    f_t q_1_hat = q_1 - std::floor(q_1);
+    f_t q_2_hat = q_2 - std::floor(q_2);
+    return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1);
+  };
+
+  auto h = [](f_t q) -> f_t { return std::max(q, 0.0); };
+
+  std::vector<i_t> cut_indices;
+  cut_indices.reserve(a.i.size());
+  f_t R;
+  if (!needs_complement_) {
+    R = (beta - std::floor(beta)) * std::ceil(beta);
+
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      f_t aj       = a.x[k];
+      if (var_types[jj] == variable_type_t::INTEGER) {
+        x_workspace_[jj] += f(aj, beta);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      } else {
+        x_workspace_[jj] += h(aj);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      }
+    }
+  } else {
+    // Compute r
+    f_t r = beta;
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      if (has_upper_[jj]) {
+        const f_t uj = upper_bounds[jj];
+        r -= uj * a.x[k];
+        continue;
+      }
+      if (has_lower_[jj]) {
+        const f_t lj = lower_bounds[jj];
+        r -= lj * a.x[k];
+      }
+    }
+
+    // Compute R
+    R = std::ceil(r) * (r - std::floor(r));
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      const f_t aj = a.x[k];
+      if (has_upper_[jj]) {
+        const f_t uj = upper_bounds[jj];
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          R -= f(-aj, r) * uj;
+        } else {
+          R -= h(-aj) * uj;
+        }
+      } else if (has_lower_[jj]) {
+        const f_t lj = lower_bounds[jj];
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          R += f(aj, r) * lj;
+        } else {
+          R += h(aj) * lj;
+        }
+      }
+    }
+
+    // Compute the cut coefficients
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      const f_t aj = a.x[k];
+      if (has_upper_[jj]) {
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          // Upper intersect I
+          x_workspace_[jj] -= f(-aj, r);
+          if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+            x_mark_[jj] = 1;
+            cut_indices.push_back(jj);
+          }
+        } else {
+          // Upper intersect C
+          f_t h_j = h(-aj);
+          if (h_j != 0.0) {
+            x_workspace_[jj] -= h_j;
+            if (!x_mark_[jj]) {
+              x_mark_[jj] = 1;
+              cut_indices.push_back(jj);
+            }
+          }
+        }
+      } else if (var_types[jj] == variable_type_t::INTEGER) {
+        // I \ Upper
+        x_workspace_[jj] += f(aj, r);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      } else {
+        // C \ Upper
+        f_t h_j = h(aj);
+        if (h_j != 0.0) {
+          x_workspace_[jj] += h_j;
+          if (!x_mark_[jj]) {
+            x_mark_[jj] = 1;
+            cut_indices.push_back(jj);
+          }
+        }
+      }
+    }
+  }
+
+  cut.i.reserve(cut_indices.size());
+  cut.x.reserve(cut_indices.size());
+  cut.i.clear();
+  cut.x.clear();
+  for (i_t k = 0; k < cut_indices.size(); k++) {
+    const i_t jj = cut_indices[k];
+
+    // Check for small coefficients
+    const f_t aj = x_workspace_[jj];
+    if (std::abs(aj) < 1e-6) {
+      if (aj >= 0.0 && upper_bounds[jj] < inf) {
+        // Move this to the right-hand side
+        R -= aj * upper_bounds[jj];
+        continue;
+      } else if (aj <= 0.0 && lower_bounds[jj] > -inf) {
+        R += aj * lower_bounds[jj];
+        continue;
+      } else {
+      }
+    }
+    cut.i.push_back(jj);
+    cut.x.push_back(x_workspace_[jj]);
+  }
+
+  // Clear the workspace
+  for (i_t jj : cut_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+  }
+
+#ifdef CHECK_WORKSPACE
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      assert(x_workspace_[j] == 0.0);
+    }
+    if (x_mark_[j] != 0) {
+      printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      assert(x_mark_[j] == 0);
+    }
+  }
+#endif
+
+  // The new cut is: g'*x >= R
+  // But we want to have it in the form h'*x <= b
+  cut.sort();
+
+  cut_rhs = R;
+
+#ifdef CHECK_REPEATED_INDICES
+  // Check for repeated indicies
+  std::vector<i_t> check(num_vars_, 0);
+  for (i_t p = 0; p < cut.i.size(); p++) {
+    if (check[cut.i[p]] != 0) {
+      printf("repeated index in generated cut\n");
+      assert(check[cut.i[p]] == 0);
+    }
+    check[cut.i[p]] = 1;
+  }
+#endif
+
+  if (cut.i.size() == 0) { return -1; }
+
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_t<i_t, f_t>& lp,
+                                                               csr_matrix_t<i_t, f_t>& Arow,
+                                                               sparse_vector_t<i_t, f_t>& cut,
+                                                               f_t& cut_rhs)
+{
+  // Remove slacks from the cut
+  // So that the cut is only over the original variables
+  bool found_slack = false;
+  i_t cut_nz       = 0;
+  std::vector<i_t> cut_indices;
+  cut_indices.reserve(cut.i.size());
+
+#ifdef CHECK_WORKSPACE
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Begin Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      assert(x_workspace_[j] == 0.0);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Begin Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      assert(x_mark_[j] == 0);
+    }
+  }
+#endif
+
+  for (i_t k = 0; k < cut.i.size(); k++) {
+    const i_t j  = cut.i[k];
+    const f_t cj = cut.x[k];
+    if (is_slack_[j]) {
+      found_slack           = true;
+      const i_t slack_start = lp.A.col_start[j];
+#ifdef CHECK_SLACKS
+      const i_t slack_end = lp.A.col_start[j + 1];
+      const i_t slack_len = slack_end - slack_start;
+      if (slack_len != 1) {
+        printf("Slack %d has %d nzs in colum\n", j, slack_len);
+        assert(slack_len == 1);
+      }
+#endif
+      const f_t alpha = lp.A.x[slack_start];
+#ifdef CHECK_SLACKS
+      if (std::abs(alpha) != 1.0) {
+        printf("Slack %d has non-unit coefficient %e\n", j, alpha);
+        assert(std::abs(alpha) == 1.0);
+      }
+#endif
+
+      // Do the substitution
+      // Slack variable s_j participates in row i of the constraint matrix
+      // Row i is of the form:
+      // sum_{k != j} A(i, k) * x_k + alpha * s_j = rhs_i
+      // where alpha = +1/-1
+      /// So we have that
+      // s_j = (rhs_i - sum_{k != j} A(i, k) * x_k)/alpha
+
+      // Our cut is of the form:
+      // sum_{k != j} C(k) * x_k + C(j) * s_j >= cut_rhs
+      // So the cut becomes
+      // sum_{k != j} C(k) * x_k + C(j)/alpha * (rhs_i - sum_{h != j} A(i, h) * x_h) >= cut_rhs
+      // This is equivalent to:
+      // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j)/alpha * A(i, h) * x_h >= cut_rhs - C(j)/alpha
+      // * rhs_i
+      const i_t i = slack_rows_[j];
+      cut_rhs -= cj * lp.rhs[i] / alpha;
+      const i_t row_start = Arow.row_start[i];
+      const i_t row_end   = Arow.row_start[i + 1];
+      for (i_t q = row_start; q < row_end; q++) {
+        const i_t h = Arow.j[q];
+        if (h != j) {
+          const f_t aih = Arow.x[q];
+          x_workspace_[h] -= cj * aih / alpha;
+          if (!x_mark_[h]) {
+            x_mark_[h] = 1;
+            cut_indices.push_back(h);
+            cut_nz++;
+          }
+        } else {
+          const f_t aij = Arow.x[q];
+          if (std::abs(aij) != 1.0) {
+            settings_.log.printf(
+              "Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j);
+            assert(std::abs(aij) == 1.0);
+          }
+        }
+      }
+
+    } else {
+      x_workspace_[j] += cj;
+      if (!x_mark_[j]) {
+        x_mark_[j] = 1;
+        cut_indices.push_back(j);
+        cut_nz++;
+      }
+    }
+  }
+
+  if (found_slack) {
+    cut.i.reserve(cut_nz);
+    cut.x.reserve(cut_nz);
+    cut.i.clear();
+    cut.x.clear();
+
+    for (i_t k = 0; k < cut_nz; k++) {
+      const i_t j = cut_indices[k];
+
+      // Check for small coefficients
+      const f_t aj = x_workspace_[j];
+      if (std::abs(aj) < 1e-6) {
+        if (aj >= 0.0 && lp.upper[j] < inf) {
+          // Move this to the right-hand side
+          cut_rhs -= aj * lp.upper[j];
+          continue;
+        } else if (aj <= 0.0 && lp.lower[j] > -inf) {
+          cut_rhs += aj * lp.lower[j];
+          continue;
+        } else {
+        }
+      }
+
+      cut.i.push_back(j);
+      cut.x.push_back(x_workspace_[j]);
+    }
+    // Sort the cut
+    cut.sort();
+  }
+
+  // Clear the workspace
+  for (i_t jj : cut_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+  }
+
+#ifdef CHECK_WORKSPACE
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("End Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      assert(x_workspace_[j] == 0.0);
+    }
+    if (x_mark_[j] != 0) {
+      printf("End Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      assert(x_mark_[j] == 0);
+    }
+  }
+#endif
+}
+
+template <typename i_t, typename f_t>
+f_t mixed_integer_rounding_cut_t<i_t, f_t>::compute_violation(const sparse_vector_t<i_t, f_t>& cut,
+                                                              f_t cut_rhs,
+                                                              const std::vector<f_t>& xstar)
+{
+  f_t dot           = cut.dot(xstar);
+  f_t cut_violation = cut_rhs - dot;
+  return cut_violation;
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::combine_rows(
+  const lp_problem_t<i_t, f_t>& lp,
+  csr_matrix_t<i_t, f_t>& Arow,
+  i_t xj,
+  const sparse_vector_t<i_t, f_t>& pivot_row,
+  f_t pivot_row_rhs,
+  sparse_vector_t<i_t, f_t>& inequality,
+  f_t& inequality_rhs)
+{
+#ifdef CHECK_WORKSPACE
+  for (i_t k = 0; k < x_workspace_.size(); k++) {
+    if (x_workspace_[k] != 0.0) {
+      printf("Dirty x_workspace_[%d] = %e\n", k, x_workspace_[k]);
+      assert(x_workspace_[k] == 0.0);
+    }
+    if (x_mark_[k] != 0) {
+      printf("Dirty x_mark_[%d] = %d\n", k, x_mark_[k]);
+      assert(x_mark_[k] == 0);
+    }
+  }
+#endif
+
+  indices_.clear();
+  indices_.reserve(pivot_row.i.size() + inequality.i.size());
+
+  // Find the coefficient associated with variable xj in the pivot row
+  f_t a_l_j = 0.0;
+  for (i_t k = 0; k < pivot_row.i.size(); k++) {
+    const i_t j = pivot_row.i[k];
+    if (j == xj) {
+      a_l_j = pivot_row.x[k];
+      break;
+    }
+  }
+
+  if (a_l_j == 0) { return; }
+
+  f_t a_i_j = 0.0;
+
+  i_t nz = 0;
+  // Store the inequality in the workspace
+  // and save the coefficient associated with variable xj
+  for (i_t k = 0; k < inequality.i.size(); k++) {
+    const i_t j = inequality.i[k];
+    if (j != xj) {
+      x_workspace_[j] = inequality.x[k];
+      x_mark_[j]      = 1;
+      indices_.push_back(j);
+      nz++;
+    } else {
+      a_i_j = inequality.x[k];
+    }
+  }
+
+  f_t pivot_value = a_i_j / a_l_j;
+  // Adjust the rhs of the inequality
+  inequality_rhs -= pivot_value * pivot_row_rhs;
+
+  // Adjust the coefficients of the inequality
+  // based on the nonzeros in the pivot row
+  for (i_t k = 0; k < pivot_row.i.size(); k++) {
+    const i_t j = pivot_row.i[k];
+    if (j != xj) {
+      x_workspace_[j] -= pivot_value * pivot_row.x[k];
+      if (!x_mark_[j]) {
+        x_mark_[j] = 1;
+        indices_.push_back(j);
+        nz++;
+      }
+    }
+  }
+
+  // Store the new inequality
+  inequality.i.resize(nz);
+  inequality.x.resize(nz);
+  for (i_t k = 0; k < nz; k++) {
+    inequality.i[k] = indices_[k];
+    inequality.x[k] = x_workspace_[indices_[k]];
+  }
+
+#ifdef CHECK_REPEATED_INDICES
+  // Check for repeated indices
+  std::vector<i_t> check(num_vars_, 0);
+  for (i_t k = 0; k < inequality.i.size(); k++) {
+    if (check[inequality.i[k]] == 1) {
+      printf("repeated index\n");
+      assert(check[inequality.i[k]] == 0);
+    }
+    check[inequality.i[k]] = 1;
+  }
+#endif
+
+  // Clear the workspace
+  for (i_t j : indices_) {
+    x_workspace_[j] = 0.0;
+    x_mark_[j]      = 0;
+  }
+  indices_.clear();
+}
+
+template <typename i_t, typename f_t>
+strong_cg_cut_t<i_t, f_t>::strong_cg_cut_t(const lp_problem_t<i_t, f_t>& lp,
+                                           const std::vector<variable_type_t>& var_types,
+                                           const std::vector<f_t>& xstar)
+  : transformed_variables_(lp.num_cols, 0)
+{
+  // Determine the substition for the integer variables
+  for (i_t j = 0; j < lp.num_cols; j++) {
+    if (var_types[j] == variable_type_t::INTEGER) {
+      const f_t l_j = lp.lower[j];
+      const f_t u_j = lp.upper[j];
+      if (l_j != 0.0) {
+        // We need to transform the variable
+        // Check the distance to each bound
+        const f_t dist_to_lower = std::max(0.0, xstar[j] - l_j);
+        const f_t dist_to_upper = std::max(0.0, u_j - xstar[j]);
+        if (dist_to_upper >= dist_to_lower || u_j >= inf) {
+          // We are closer to the lower bound.
+          transformed_variables_[j] = -1;
+        } else if (u_j < inf) {
+          // We are closer to the finite upper bound
+          transformed_variables_[j] = 1;
+        }
+      }
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t strong_cg_cut_t<i_t, f_t>::remove_continuous_variables_integers_nonnegative(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  const std::vector<variable_type_t>& var_types,
+  sparse_vector_t<i_t, f_t>& inequality,
+  f_t& inequality_rhs)
+{
+  const bool verbose = false;
+  // Count the number of continuous variables in the inequality
+  i_t num_continuous = 0;
+  const i_t nz       = inequality.i.size();
+  for (i_t k = 0; k < nz; k++) {
+    const i_t j = inequality.i[k];
+    if (var_types[j] == variable_type_t::CONTINUOUS) { num_continuous++; }
+  }
+
+  if (verbose) { settings.log.printf("num_continuous %d\n", num_continuous); }
+  // We assume the inequality is of the form sum_j a_j x_j <= rhs
+
+  for (i_t k = 0; k < nz; k++) {
+    const i_t j   = inequality.i[k];
+    const f_t l_j = lp.lower[j];
+    const f_t u_j = lp.upper[j];
+    const f_t a_j = inequality.x[k];
+    if (var_types[j] == variable_type_t::CONTINUOUS) {
+      if (a_j == 0.0) { continue; }
+
+      if (a_j > 0.0 && l_j > -inf) {
+        // v_j = x_j - l_j >= 0
+        // x_j = v_j + l_j
+        // sum_{k != j} a_k x_k + a_j x_j <= rhs
+        // sum_{k != j} a_k x_k + a_j (v_j + l_j) <= rhs
+        // sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j
+        inequality_rhs -= a_j * l_j;
+        transformed_variables_[j] = -1;
+
+        // We now have a_j * v_j with a_j, v_j >= 0
+        // So we have sum_{k != j} a_k x_k <= sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j
+        // So we can now drop the continuous variable v_j
+        inequality.x[k] = 0.0;
+
+      } else if (a_j < 0.0 && u_j < inf) {
+        // w_j = u_j - x_j >= 0
+        // x_j = u_j - w_j
+        // sum_{k != j} a_k x_k + a_j x_j <= rhs
+        // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= rhs
+        // sum_{k != j} a_k x_k - a_j w_j <= rhs - a_j u_j
+        inequality_rhs -= a_j * u_j;
+        transformed_variables_[j] = 1;
+
+        // We now have a_j * w_j with a_j, w_j >= 0
+        // So we have sum_{k != j} a_k x_k <= sum_{k != j} a_k x_k + a_j w_j <= rhs - a_j u_j
+        // So we can now drop the continuous variable w_j
+        inequality.x[k] = 0.0;
+      } else {
+        // We can't keep the coefficient of the continuous variable positive
+        // This means we can't eliminate the continuous variable
+        if (verbose) { settings.log.printf("x%d ak: %e lo: %e up: %e\n", j, a_j, l_j, u_j); }
+        return -1;
+      }
+    } else {
+      // The variable is integer. We just need to ensure it is nonnegative
+      if (transformed_variables_[j] == -1) {
+        // We are closer to the lower bound.
+        // v_j = x_j - l_j >= 0
+        // x_j = v_j + l_j
+        // sum_{k != j} a_k x_k + a_j x_j <= rhs
+        // sum_{k != j} a_k x_k + a_j (v_j + l_j) <= rhs
+        // sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j
+        inequality_rhs -= a_j * l_j;
+      } else if (transformed_variables_[j] == 1) {
+        // We are closer to the finite upper bound
+        // w_j = u_j - x_j >= 0
+        // x_j = u_j - w_j
+        // sum_{k != j} a_k x_k + a_j x_j <= rhs
+        // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= rhs
+        // sum_{k != j} a_k x_k - a_j w_j <= rhs - a_j u_j
+        inequality_rhs -= a_j * u_j;
+        inequality.x[k] *= -1.0;
+      }
+    }
+  }
+
+  // Squeeze out the zero coefficents
+  sparse_vector_t<i_t, f_t> new_inequality(inequality.n, 0);
+  inequality.squeeze(new_inequality);
+  inequality = new_inequality;
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+void strong_cg_cut_t<i_t, f_t>::to_original_integer_variables(const lp_problem_t<i_t, f_t>& lp,
+                                                              sparse_vector_t<i_t, f_t>& cut,
+                                                              f_t& cut_rhs)
+{
+  // We expect a cut of the form sum_j a_j y_j <= rhs
+  // where y_j >= 0 is a transformed variable
+  // We need to convert it back into a cut on the original variables
+
+  for (i_t k = 0; k < cut.i.size(); k++) {
+    const i_t j   = cut.i[k];
+    const f_t a_j = cut.x[k];
+    if (transformed_variables_[j] == -1) {
+      // sum_{k != j} a_k x_k + a_j v_j <= rhs
+      // v_j = x_j - l_j >= 0,
+      // sum_{k != j} a_k x_k + a_j (x_j - l_j) <= rhs
+      // sum_{k != j} a_k x_k + a_j x_j <= rhs + a_j l_j
+      cut_rhs += a_j * lp.lower[j];
+    } else if (transformed_variables_[j] == 1) {
+      // sum_{k != j} a_k x_k + a_j w_j <= rhs
+      // w_j = u_j - x_j >= 0
+      // sum_{k != j} a_k x_k + a_j (u_j - x_j) <= rhs
+      // sum_{k != j} a_k x_k - a_j x_j <= rhs - a_j u_j
+      cut_rhs -= a_j * lp.upper[j];
+      cut.x[k] *= -1.0;
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t strong_cg_cut_t<i_t, f_t>::generate_strong_cg_cut_integer_only(
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  const std::vector<variable_type_t>& var_types,
+  const sparse_vector_t<i_t, f_t>& inequality,
+  f_t inequality_rhs,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  // We expect an inequality of the form sum_j a_j x_j <= rhs
+  // where all the variables x_j are integer and nonnegative
+
+  // We then apply the CG cut:
+  // sum_j floor(a_j) x_j <= floor(rhs)
+  cut.i.reserve(inequality.i.size());
+  cut.x.reserve(inequality.i.size());
+  cut.i.clear();
+  cut.x.clear();
+
+  f_t a_0   = inequality_rhs;
+  f_t f_a_0 = fractional_part(a_0);
+
+  if (f_a_0 == 0.0) {
+    // f(a_0) == 0.0 so we do a weak CG cut
+    cut.i.reserve(inequality.i.size());
+    cut.x.reserve(inequality.i.size());
+    cut.i.clear();
+    cut.x.clear();
+    for (i_t k = 0; k < inequality.i.size(); k++) {
+      const i_t j   = inequality.i[k];
+      const f_t a_j = inequality.x[k];
+      if (var_types[j] == variable_type_t::INTEGER) {
+        cut.i.push_back(j);
+        cut.x.push_back(std::floor(a_j));
+      } else {
+        return -1;
+      }
+    }
+    cut_rhs = std::floor(inequality_rhs);
+  } else {
+    return generate_strong_cg_cut_helper(
+      inequality.i, inequality.x, inequality_rhs, var_types, cut, cut_rhs);
+  }
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+i_t strong_cg_cut_t<i_t, f_t>::generate_strong_cg_cut_helper(
+  const std::vector<i_t>& indicies,
+  const std::vector<f_t>& coefficients,
+  f_t rhs,
+  const std::vector<variable_type_t>& var_types,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  const bool verbose = false;
+  const i_t nz       = indicies.size();
+  const f_t f_a_0    = fractional_part(rhs);
+
+  const f_t min_fractional_part = 1e-2;
+  if (f_a_0 < min_fractional_part) { return -1; }
+  if (f_a_0 > 1 - min_fractional_part) { return -1; }
+
+  // We will try to generat a strong CG cut.
+  // Find the unique integer k such that
+  // 1/(k+1) <= f(a_0) < 1/k
+  const f_t k_upper = 1.0 / f_a_0;
+  i_t k             = static_cast<i_t>(std::ceil(k_upper)) - 1;
+
+  const f_t alpha = 1.0 - f_a_0;
+  f_t lower       = 1.0 / static_cast<f_t>(k + 1);
+  f_t upper       = 1.0 / static_cast<f_t>(k);
+  if (verbose) { printf("f_a_0 %e lower %e upper %e alpha %e\n", f_a_0, lower, upper, alpha); }
+  if (f_a_0 >= lower && f_a_0 < upper) {
+    cut.i.reserve(nz);
+    cut.x.reserve(nz);
+    cut.i.clear();
+    cut.x.clear();
+    for (i_t q = 0; q < nz; q++) {
+      const i_t j   = indicies[q];
+      const f_t a_j = coefficients[q];
+      if (var_types[j] == variable_type_t::INTEGER) {
+        const f_t f_a_j = fractional_part(a_j);
+        const f_t tol   = 1e-4;
+        if (f_a_j <= f_a_0 + tol) {
+          cut.i.push_back(j);
+          cut.x.push_back((k + 1.0) * std::floor(a_j));
+          if (verbose) { printf("j %d a_j %e f_a_j %e k %d\n", j, a_j, f_a_j, k); }
+        } else {
+          // Find p such that p <= k * f(a_j) < p + 1
+          i_t p = static_cast<i_t>(std::floor(k * f_a_j));
+          // If f(a_j) > f(a_0) + p /k (1 - f(a_0)) then we can increase the cofficient by 1
+          const f_t rhs_j = f_a_0 + static_cast<f_t>(p) / static_cast<f_t>(k) * alpha;
+          const i_t coeff = (k + 1) * static_cast<i_t>(std::floor(a_j)) + p;
+          if (f_a_j > rhs_j + tol) {
+            cut.i.push_back(j);
+            cut.x.push_back(static_cast<f_t>(coeff + 1));
+          } else {
+            cut.i.push_back(j);
+            cut.x.push_back(static_cast<f_t>(coeff));
+          }
+        }
+      } else {
+        return -1;
+      }
+    }
+  } else {
+    if (verbose) { printf("Error: k %d lower %e f(a_0) %e upper %e\n", k, lower, f_a_0, upper); }
+    return -1;
+  }
+  cut_rhs = (k + 1.0) * std::floor(rhs);
+  if (verbose) {
+    printf("Generated strong CG cut: k %d f_a_0 %e cut_rhs %e\n", k, f_a_0, cut_rhs);
+    for (i_t q = 0; q < cut.i.size(); q++) {
+      if (cut.x[q] != 0.0) { printf("%.16e x%d ", cut.x[q], cut.i[q]); }
+    }
+    printf("\n");
+    printf("Original inequality rhs %e nz %ld\n", rhs, coefficients.size());
+    for (i_t q = 0; q < nz; q++) {
+      printf("%e x%d ", coefficients[q], indicies[q]);
+    }
+    printf("\n");
+  }
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+i_t strong_cg_cut_t<i_t, f_t>::generate_strong_cg_cut(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  const std::vector<variable_type_t>& var_types,
+  const sparse_vector_t<i_t, f_t>& inequality,
+  const f_t inequality_rhs,
+  const std::vector<f_t>& xstar,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+#ifdef PRINT_INEQUALITY_INFO
+  for (i_t k = 0; k < inequality.i.size(); k++) {
+    printf("%e %c%d ",
+           inequality.x[k],
+           var_types[inequality.i[k]] == variable_type_t::CONTINUOUS ? 'x' : 'y',
+           inequality.i[k]);
+  }
+  printf("CG inequality rhs %e\n", inequality_rhs);
+#endif
+  // Try to remove continuous variables from the inequality
+  // and transform integer variables to be nonnegative
+
+  // Copy the inequality since remove continuous variables will modify it
+  sparse_vector_t<i_t, f_t> cg_inequality = inequality;
+  f_t cg_inequality_rhs                   = inequality_rhs;
+  i_t status                              = remove_continuous_variables_integers_nonnegative(
+    lp, settings, var_types, cg_inequality, cg_inequality_rhs);
+
+  if (status != 0) {
+    // Try negating the equality and see if that helps
+    cg_inequality = inequality;
+    cg_inequality.negate();
+    cg_inequality_rhs = -inequality_rhs;
+
+    status = remove_continuous_variables_integers_nonnegative(
+      lp, settings, var_types, cg_inequality, cg_inequality_rhs);
+  }
+
+  if (status == 0) {
+    // We have an inequality with no continuous variables
+
+    // Generate a CG cut
+    status = generate_strong_cg_cut_integer_only(
+      settings, var_types, cg_inequality, cg_inequality_rhs, cut, cut_rhs);
+    if (status != 0) { return -1; }
+
+    // Convert the CG cut back to the original variables
+    to_original_integer_variables(lp, cut, cut_rhs);
+
+    // Check for violation
+    f_t dot = cut.dot(xstar);
+    // If the cut is violated we will have: sum_j a_j xstar_j > rhs
+    f_t violation                     = dot - cut_rhs;
+    const f_t min_violation_threshold = 1e-6;
+    if (violation > min_violation_threshold) {
+      //  Note that no slacks are currently present. Since slacks are currently treated as
+      //  continuous. However, this may change. We may need to substitute out the slacks here
+
+      // The CG cut is in the form: sum_j a_j x_j <= rhs
+      // The cut pool wants the cut in the form: sum_j a_j x_j >= rhs
+      cut.negate();
+      cut_rhs *= -1.0;
+      return 0;
+    }
+  }
+  return -1;
+}
+
+template <typename i_t, typename f_t>
+i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
+             const csr_matrix_t<i_t, f_t>& cuts,
+             const std::vector<f_t>& cut_rhs,
+             lp_problem_t<i_t, f_t>& lp,
+             std::vector<i_t>& new_slacks,
+             lp_solution_t<i_t, f_t>& solution,
+             basis_update_mpf_t<i_t, f_t>& basis_update,
+             std::vector<i_t>& basic_list,
+             std::vector<i_t>& nonbasic_list,
+             std::vector<variable_status_t>& vstatus,
+             std::vector<f_t>& edge_norms)
+
+{
+  // Given a set of cuts: C*x <= d that are currently violated
+  // by the current solution x* (i.e. C*x* > d), this function
+  // adds the cuts into the LP and solves again.
+
+#ifdef CHECK_BASIS
+  {
+    csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
+    basis_update.multiply_lu(Btest);
+    csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
+    form_b(lp.A, basic_list, B);
+    csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
+    add(Btest, B, 1.0, -1.0, Diff);
+    const f_t err = Diff.norm1();
+    settings.log.printf("Before || B - L*U || %e\n", err);
+    assert(err <= 1e-6);
+  }
+#endif
+
+  const i_t p = cuts.m;
+  if (cut_rhs.size() != static_cast<size_t>(p)) {
+    settings.log.printf("cut_rhs must have the same number of rows as cuts\n");
+    assert(cut_rhs.size() == static_cast<size_t>(p));
+  }
+  settings.log.debug("Number of cuts %d\n", p);
+  settings.log.debug("Original lp rows %d\n", lp.num_rows);
+  settings.log.debug("Original lp cols %d\n", lp.num_cols);
+
+  csr_matrix_t<i_t, f_t> new_A_row(lp.num_rows, lp.num_cols, 1);
+  lp.A.to_compressed_row(new_A_row);
+
+  i_t append_status = new_A_row.append_rows(cuts);
+  if (append_status != 0) {
+    settings.log.printf("append_rows error: %d\n", append_status);
+    assert(append_status == 0);
+  }
+
+  csc_matrix_t<i_t, f_t> new_A_col(lp.num_rows + p, lp.num_cols, 1);
+  new_A_row.to_compressed_col(new_A_col);
+
+  // Add in slacks variables for the new rows
+  lp.lower.resize(lp.num_cols + p);
+  lp.upper.resize(lp.num_cols + p);
+  lp.objective.resize(lp.num_cols + p);
+  edge_norms.resize(lp.num_cols + p);
+  i_t nz = new_A_col.col_start[lp.num_cols];
+  new_A_col.col_start.resize(lp.num_cols + p + 1);
+  new_A_col.i.resize(nz + p);
+  new_A_col.x.resize(nz + p);
+  i_t k = lp.num_rows;
+  for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) {
+    new_A_col.col_start[j] = nz;
+    new_A_col.i[nz]        = k++;
+    new_A_col.x[nz]        = 1.0;
+    nz++;
+    lp.lower[j]     = 0.0;
+    lp.upper[j]     = inf;
+    lp.objective[j] = 0.0;
+    edge_norms[j]   = 1.0;
+    new_slacks.push_back(j);
+  }
+  settings.log.debug("Done adding slacks\n");
+  new_A_col.col_start[lp.num_cols + p] = nz;
+  new_A_col.n                          = lp.num_cols + p;
+
+  lp.A = new_A_col;
+
+  // Check that all slack columns have length 1
+  for (i_t slack : new_slacks) {
+    const i_t col_start = lp.A.col_start[slack];
+    const i_t col_end   = lp.A.col_start[slack + 1];
+    const i_t col_len   = col_end - col_start;
+    if (col_len != 1) {
+      settings.log.printf("Add cuts: Slack %d has %d nzs in column\n", slack, col_len);
+      assert(col_len == 1);
+    }
+  }
+
+  i_t old_rows = lp.num_rows;
+  lp.num_rows += p;
+  i_t old_cols = lp.num_cols;
+  lp.num_cols += p;
+
+  lp.rhs.resize(lp.num_rows);
+  for (i_t k = old_rows; k < old_rows + p; k++) {
+    const i_t h = k - old_rows;
+    lp.rhs[k]   = cut_rhs[h];
+  }
+  settings.log.debug("Done adding rhs\n");
+
+  // Construct C_B = C(:, basic_list)
+  std::vector<i_t> C_col_degree(lp.num_cols, 0);
+  i_t cuts_nz = cuts.row_start[p];
+  for (i_t q = 0; q < cuts_nz; q++) {
+    const i_t j = cuts.j[q];
+    if (j >= lp.num_cols) {
+      settings.log.printf("Cut column index j=%d exceeds num_cols=%d\n", j, lp.num_cols);
+      return -1;
+    }
+    C_col_degree[j]++;
+  }
+  settings.log.debug("Done computing C_col_degree\n");
+
+  std::vector<i_t> in_basis(old_cols, -1);
+  const i_t num_basic = static_cast<i_t>(basic_list.size());
+  i_t C_B_nz          = 0;
+  for (i_t k = 0; k < num_basic; k++) {
+    const i_t j = basic_list[k];
+    if (j < 0 || j >= old_cols) {
+      settings.log.printf(
+        "basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols);
+      assert(j >= 0 && j < old_cols);
+    }
+    in_basis[j] = k;
+    // The cuts are on the original variables. So it is possible that
+    // a slack will be basic and thus not part of the cuts matrix
+    if (j < cuts.n) { C_B_nz += C_col_degree[j]; }
+  }
+  settings.log.debug("Done estimating C_B_nz\n");
+
+  csr_matrix_t<i_t, f_t> C_B(p, num_basic, C_B_nz);
+  nz = 0;
+  for (i_t i = 0; i < p; i++) {
+    C_B.row_start[i]    = nz;
+    const i_t row_start = cuts.row_start[i];
+    const i_t row_end   = cuts.row_start[i + 1];
+    for (i_t q = row_start; q < row_end; q++) {
+      const i_t j       = cuts.j[q];
+      const i_t j_basis = in_basis[j];
+      if (j_basis == -1) { continue; }
+      C_B.j[nz] = j_basis;
+      C_B.x[nz] = cuts.x[q];
+      nz++;
+    }
+  }
+  C_B.row_start[p] = nz;
+
+  if (nz != C_B_nz) {
+    settings.log.printf("Add cuts: predicted nz %d actual nz %d\n", C_B_nz, nz);
+    assert(nz == C_B_nz);
+  }
+  settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz);
+
+  // Adjust the basis update to include the new cuts
+  basis_update.append_cuts(C_B);
+
+  basic_list.resize(lp.num_rows, 0);
+  i_t h = old_cols;
+  for (i_t j = old_rows; j < lp.num_rows; j++) {
+    basic_list[j] = h++;
+  }
+
+#ifdef CHECK_BASIS
+  // Check the basis update
+  csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
+  basis_update.multiply_lu(Btest);
+
+  csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
+  form_b(lp.A, basic_list, B);
+
+  csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
+  add(Btest, B, 1.0, -1.0, Diff);
+  const f_t err = Diff.norm1();
+  settings.log.printf("After || B - L*U || %e\n", err);
+  if (err > 1e-6) {
+    settings.log.printf("Diff matrix\n");
+    // Diff.print_matrix();
+    assert(err <= 1e-6);
+  }
+#endif
+  // Adjust the vstatus
+  vstatus.resize(lp.num_cols);
+  for (i_t j = old_cols; j < lp.num_cols; j++) {
+    vstatus[j] = variable_status_t::BASIC;
+  }
+
+  // Adjust the solution
+  solution.x.resize(lp.num_cols, 0.0);
+  solution.y.resize(lp.num_rows, 0.0);
+  solution.z.resize(lp.num_cols, 0.0);
+
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+void remove_cuts(lp_problem_t<i_t, f_t>& lp,
+                 const simplex_solver_settings_t<i_t, f_t>& settings,
+                 csr_matrix_t<i_t, f_t>& Arow,
+                 std::vector<i_t>& new_slacks,
+                 i_t original_rows,
+                 std::vector<variable_type_t>& var_types,
+                 std::vector<variable_status_t>& vstatus,
+                 std::vector<f_t>& edge_norms,
+                 std::vector<f_t>& x,
+                 std::vector<f_t>& y,
+                 std::vector<f_t>& z,
+                 std::vector<i_t>& basic_list,
+                 std::vector<i_t>& nonbasic_list,
+                 basis_update_mpf_t<i_t, f_t>& basis_update)
+{
+  std::vector<i_t> cuts_to_remove;
+  cuts_to_remove.reserve(lp.num_rows - original_rows);
+  std::vector<i_t> slacks_to_remove;
+  slacks_to_remove.reserve(lp.num_rows - original_rows);
+  const f_t dual_tol = 1e-10;
+
+  std::vector<i_t> is_slack(lp.num_cols, 0);
+  for (i_t j : new_slacks) {
+    is_slack[j] = 1;
+#ifdef CHECK_SLACKS
+    // Check that slack column length is 1
+    const i_t col_start = lp.A.col_start[j];
+    const i_t col_end   = lp.A.col_start[j + 1];
+    const i_t col_len   = col_end - col_start;
+    if (col_len != 1) {
+      printf("Remove cuts: Slack %d has %d nzs in column\n", j, col_len);
+      assert(col_len == 1);
+    }
+#endif
+  }
+
+  for (i_t k = original_rows; k < lp.num_rows; k++) {
+    if (std::abs(y[k]) < dual_tol) {
+      const i_t row_start = Arow.row_start[k];
+      const i_t row_end   = Arow.row_start[k + 1];
+      i_t last_slack      = -1;
+      const f_t slack_tol = 1e-3;
+      for (i_t p = row_start; p < row_end; p++) {
+        const i_t j = Arow.j[p];
+        if (is_slack[j]) {
+          if (vstatus[j] == variable_status_t::BASIC && x[j] > slack_tol) { last_slack = j; }
+        }
+      }
+      if (last_slack != -1) {
+        cuts_to_remove.push_back(k);
+        slacks_to_remove.push_back(last_slack);
+      }
+    }
+  }
+
+  if (cuts_to_remove.size() > 0) {
+    std::vector<i_t> marked_rows(lp.num_rows, 0);
+    for (i_t i : cuts_to_remove) {
+      marked_rows[i] = 1;
+    }
+    std::vector<i_t> marked_cols(lp.num_cols, 0);
+    for (i_t j : slacks_to_remove) {
+      marked_cols[j] = 1;
+    }
+
+    std::vector<f_t> new_rhs(lp.num_rows - cuts_to_remove.size());
+    std::vector<f_t> new_solution_y(lp.num_rows - cuts_to_remove.size());
+    i_t h = 0;
+    for (i_t i = 0; i < lp.num_rows; i++) {
+      if (!marked_rows[i]) {
+        new_rhs[h]        = lp.rhs[i];
+        new_solution_y[h] = y[i];
+        h++;
+      }
+    }
+    csr_matrix_t<i_t, f_t> new_Arow(1, 1, 0);
+    Arow.remove_rows(marked_rows, new_Arow);
+    Arow = new_Arow;
+    Arow.to_compressed_col(lp.A);
+
+    std::vector<f_t> new_objective(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_lower(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_upper(lp.num_cols - slacks_to_remove.size());
+    std::vector<variable_type_t> new_var_types(lp.num_cols - slacks_to_remove.size());
+    std::vector<variable_status_t> new_vstatus(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_edge_norms(lp.num_cols - slacks_to_remove.size());
+    std::vector<i_t> new_basic_list;
+    new_basic_list.reserve(lp.num_rows - slacks_to_remove.size());
+    std::vector<i_t> new_nonbasic_list;
+    new_nonbasic_list.reserve(nonbasic_list.size());
+    std::vector<f_t> new_solution_x(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_solution_z(lp.num_cols - slacks_to_remove.size());
+    std::vector<i_t> new_is_slacks(lp.num_cols - slacks_to_remove.size(), 0);
+    h = 0;
+    for (i_t k = 0; k < lp.num_cols; k++) {
+      if (!marked_cols[k]) {
+        new_objective[h]  = lp.objective[k];
+        new_lower[h]      = lp.lower[k];
+        new_upper[h]      = lp.upper[k];
+        new_var_types[h]  = var_types[k];
+        new_vstatus[h]    = vstatus[k];
+        new_edge_norms[h] = edge_norms[k];
+        new_solution_x[h] = x[k];
+        new_solution_z[h] = z[k];
+        new_is_slacks[h]  = is_slack[k];
+        if (new_vstatus[h] != variable_status_t::BASIC) {
+          new_nonbasic_list.push_back(h);
+        } else {
+          new_basic_list.push_back(h);
+        }
+        h++;
+      }
+    }
+    lp.A.remove_columns(marked_cols);
+    lp.A.to_compressed_row(Arow);
+    lp.objective = new_objective;
+    lp.lower     = new_lower;
+    lp.upper     = new_upper;
+    lp.rhs       = new_rhs;
+    var_types    = new_var_types;
+    lp.num_cols  = lp.A.n;
+    lp.num_rows  = lp.A.m;
+
+    new_slacks.clear();
+    new_slacks.reserve(lp.num_cols);
+    for (i_t j = 0; j < lp.num_cols; j++) {
+      if (new_is_slacks[j]) { new_slacks.push_back(j); }
+    }
+    basic_list    = new_basic_list;
+    nonbasic_list = new_nonbasic_list;
+    vstatus       = new_vstatus;
+    edge_norms    = new_edge_norms;
+    x             = new_solution_x;
+    y             = new_solution_y;
+    z             = new_solution_z;
+
+    settings.log.debug("Removed %d cuts. After removal %d rows %d columns %d nonzeros\n",
+                       cuts_to_remove.size(),
+                       lp.num_rows,
+                       lp.num_cols,
+                       lp.A.col_start[lp.A.n]);
+
+    basis_update.resize(lp.num_rows);
+    basis_update.refactor_basis(
+      lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus);
+  }
+}
+
+template <typename i_t, typename f_t>
+void read_saved_solution_for_cut_verification(const lp_problem_t<i_t, f_t>& lp,
+                                              const simplex_solver_settings_t<i_t, f_t>& settings,
+                                              std::vector<f_t>& saved_solution)
+{
+  settings.log.printf("Trying to open solution.dat\n");
+  FILE* fid = NULL;
+  fid       = fopen("solution.dat", "r");
+  if (fid != NULL) {
+    i_t n_solution_dat;
+    i_t count = fscanf(fid, "%d\n", &n_solution_dat);
+    settings.log.printf(
+      "Solution.dat variables %d =? %d =? count %d\n", n_solution_dat, lp.num_cols, count);
+    bool good = true;
+    if (count == 1 && n_solution_dat == lp.num_cols) {
+      settings.log.printf("Opened solution.dat with %d number of variables\n", n_solution_dat);
+      saved_solution.resize(n_solution_dat);
+      for (i_t j = 0; j < n_solution_dat; j++) {
+        count = fscanf(fid, "%lf", &saved_solution[j]);
+        if (count != 1) {
+          settings.log.printf("bad read solution.dat: j %d count %d\n", j, count);
+          good = false;
+          break;
+        }
+      }
+    } else {
+      good = false;
+    }
+    fclose(fid);
+
+    if (!good) {
+      saved_solution.resize(0);
+      settings.log.printf("Solution.dat is bad\n");
+    } else {
+      settings.log.printf("Read solution file\n");
+
+      auto hash_combine_f = [](size_t seed, f_t x) {
+        seed ^= std::hash<f_t>{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+        return seed;
+      };
+      size_t seed = lp.num_cols;
+      for (i_t j = 0; j < lp.num_cols; ++j) {
+        seed = hash_combine_f(seed, saved_solution[j]);
+      }
+      settings.log.printf("Saved solution hash: %20x\n", seed);
+
+      // Compute || A * x - b ||_inf
+      std::vector<f_t> residual = lp.rhs;
+      matrix_vector_multiply(lp.A, 1.0, saved_solution, -1.0, residual);
+      settings.log.printf("Saved solution: || A*x - b ||_inf %e\n",
+                          vector_norm_inf<i_t, f_t>(residual));
+      f_t infeas = 0;
+      for (i_t j = 0; j < lp.num_cols; j++) {
+        if (saved_solution[j] < lp.lower[j] - 1e-6) {
+          f_t curr_infeas = (lp.lower[j] - saved_solution[j]);
+          infeas += curr_infeas;
+          settings.log.printf(
+            "j: %d saved solution %e lower %e\n", j, saved_solution[j], lp.lower[j]);
+        }
+        if (saved_solution[j] > lp.upper[j] + 1e-6) {
+          f_t curr_infeas = (saved_solution[j] - lp.upper[j]);
+          infeas += curr_infeas;
+          settings.log.printf(
+            "j %d saved solution %e upper %e\n", j, saved_solution[j], lp.upper[j]);
+        }
+      }
+      settings.log.printf("Bound infeasibility %e\n", infeas);
+    }
+  } else {
+    settings.log.printf("Could not open solution.dat\n");
+  }
+}
+
+template <typename i_t, typename f_t>
+void write_solution_for_cut_verification(const lp_problem_t<i_t, f_t>& lp,
+                                         const std::vector<f_t>& solution)
+{
+  FILE* fid = NULL;
+  fid       = fopen("solution.dat", "w");
+  if (fid != NULL) {
+    printf("Writing solution.dat\n");
+
+    std::vector<f_t> residual = lp.rhs;
+    matrix_vector_multiply(lp.A, 1.0, solution, -1.0, residual);
+    printf("|| A*x - b ||_inf %e\n", vector_norm_inf<i_t, f_t>(residual));
+    auto hash_combine_f = [](size_t seed, f_t x) {
+      seed ^= std::hash<f_t>{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+      return seed;
+    };
+    printf("incumbent size %ld original lp cols %d\n", solution.size(), lp.num_cols);
+    i_t n       = lp.num_cols;
+    size_t seed = n;
+    fprintf(fid, "%d\n", n);
+    for (i_t j = 0; j < n; ++j) {
+      fprintf(fid, "%.17g\n", solution[j]);
+      seed = hash_combine_f(seed, solution[j]);
+    }
+    printf("Solution hash: %20x\n", seed);
+    fclose(fid);
+  }
+}
+
+template <typename i_t, typename f_t>
+void verify_cuts_against_saved_solution(const csr_matrix_t<i_t, f_t>& cuts,
+                                        const std::vector<f_t>& cut_rhs,
+                                        const std::vector<f_t>& saved_solution)
+{
+  if (saved_solution.size() > 0) {
+    csc_matrix_t<i_t, f_t> cuts_to_add_col(cuts.m, cuts.n, cuts.row_start[cuts.m]);
+    cuts.to_compressed_col(cuts_to_add_col);
+    std::vector<f_t> Cx(cuts.m);
+    matrix_vector_multiply(cuts_to_add_col, 1.0, saved_solution, 0.0, Cx);
+    const i_t num_cuts = cuts.m;
+    for (i_t k = 0; k < num_cuts; k++) {
+      if (Cx[k] > cut_rhs[k] + 1e-6) {
+        printf("Cut %d is violated by saved solution. Cx %e cut_rhs %e Diff: %e\n",
+               k,
+               Cx[k],
+               cut_rhs[k],
+               Cx[k] - cut_rhs[k]);
+      }
+    }
+  }
+}
+
+#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
+template class cut_pool_t<int, double>;
+template class cut_generation_t<int, double>;
+template class tableau_equality_t<int, double>;
+template class mixed_integer_rounding_cut_t<int, double>;
+
+template int add_cuts(const simplex_solver_settings_t<int, double>& settings,
+                      const csr_matrix_t<int, double>& cuts,
+                      const std::vector<double>& cut_rhs,
+                      lp_problem_t<int, double>& lp,
+                      std::vector<int>& new_slacks,
+                      lp_solution_t<int, double>& solution,
+                      basis_update_mpf_t<int, double>& basis_update,
+                      std::vector<int>& basic_list,
+                      std::vector<int>& nonbasic_list,
+                      std::vector<variable_status_t>& vstatus,
+                      std::vector<double>& edge_norms);
+
+template void remove_cuts<int, double>(lp_problem_t<int, double>& lp,
+                                       const simplex_solver_settings_t<int, double>& settings,
+                                       csr_matrix_t<int, double>& Arow,
+                                       std::vector<int>& new_slacks,
+                                       int original_rows,
+                                       std::vector<variable_type_t>& var_types,
+                                       std::vector<variable_status_t>& vstatus,
+                                       std::vector<double>& edge_norms,
+                                       std::vector<double>& x,
+                                       std::vector<double>& y,
+                                       std::vector<double>& z,
+                                       std::vector<int>& basic_list,
+                                       std::vector<int>& nonbasic_list,
+                                       basis_update_mpf_t<int, double>& basis_update);
+
+template void read_saved_solution_for_cut_verification<int, double>(
+  const lp_problem_t<int, double>& lp,
+  const simplex_solver_settings_t<int, double>& settings,
+  std::vector<double>& saved_solution);
+
+template void write_solution_for_cut_verification<int, double>(const lp_problem_t<int, double>& lp,
+                                                               const std::vector<double>& solution);
+
+template void verify_cuts_against_saved_solution<int, double>(
+  const csr_matrix_t<int, double>& cuts,
+  const std::vector<double>& cut_rhs,
+  const std::vector<double>& saved_solution);
+#endif
+
+}  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp
new file mode 100644
index 0000000000..a4a36d75b2
--- /dev/null
+++ b/cpp/src/dual_simplex/cuts.hpp
@@ -0,0 +1,479 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+#pragma once
+
+#include <dual_simplex/basis_updates.hpp>
+#include <dual_simplex/presolve.hpp>
+#include <dual_simplex/simplex_solver_settings.hpp>
+#include <dual_simplex/sparse_vector.hpp>
+#include <dual_simplex/types.hpp>
+#include <dual_simplex/user_problem.hpp>
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include <cmath>
+#include <cstdint>
+
+namespace cuopt::linear_programming::dual_simplex {
+
+enum cut_type_t : int8_t {
+  MIXED_INTEGER_GOMORY   = 0,
+  MIXED_INTEGER_ROUNDING = 1,
+  KNAPSACK               = 2,
+  CHVATAL_GOMORY         = 3,
+  MAX_CUT_TYPE           = 4
+};
+
+template <typename i_t, typename f_t>
+struct cut_info_t {
+  bool has_cuts() const
+  {
+    i_t total_cuts = 0;
+    for (i_t i = 0; i < MAX_CUT_TYPE; i++) {
+      total_cuts += num_cuts[i];
+    }
+    return total_cuts > 0;
+  }
+  void record_cut_types(const std::vector<cut_type_t>& cut_types)
+  {
+    for (cut_type_t cut_type : cut_types) {
+      num_cuts[static_cast<int>(cut_type)]++;
+    }
+  }
+  const char* cut_type_names[MAX_CUT_TYPE] = {"Gomory   ", "MIR      ", "Knapsack ", "Strong CG"};
+  std::array<i_t, MAX_CUT_TYPE> num_cuts   = {0};
+};
+
+template <typename i_t, typename f_t>
+void print_cut_info(const simplex_solver_settings_t<i_t, f_t>& settings,
+                    const cut_info_t<i_t, f_t>& cut_info)
+{
+  if (cut_info.has_cuts()) {
+    for (i_t i = 0; i < MAX_CUT_TYPE; i++) {
+      settings.log.printf("%s cuts : %d\n", cut_info.cut_type_names[i], cut_info.num_cuts[i]);
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void print_cut_types(const std::string& prefix,
+                     const std::vector<cut_type_t>& cut_types,
+                     const simplex_solver_settings_t<i_t, f_t>& settings)
+{
+  cut_info_t<i_t, f_t> cut_info;
+  cut_info.record_cut_types(cut_types);
+  settings.log.printf("%s: ", prefix.c_str());
+  for (i_t i = 0; i < MAX_CUT_TYPE; i++) {
+    settings.log.printf("%s cuts: %d ", cut_info.cut_type_names[i], cut_info.num_cuts[i]);
+    if (i < MAX_CUT_TYPE - 1) { settings.log.printf(", "); }
+  }
+  settings.log.printf("\n");
+}
+
+template <typename f_t>
+f_t fractional_part(f_t a)
+{
+  return a - std::floor(a);
+}
+
+// Computes a permutation of a score vector that puts the highest scores first
+template <typename i_t, typename f_t>
+void best_score_first_permutation(std::vector<f_t>& scores, std::vector<i_t>& permutation)
+{
+  if (permutation.size() != scores.size()) { permutation.resize(scores.size()); }
+  std::iota(permutation.begin(), permutation.end(), 0);
+  std::sort(
+    permutation.begin(), permutation.end(), [&](i_t a, i_t b) { return scores[a] > scores[b]; });
+}
+
+// Computes a permutation of a score vector that puts the highest score last
+template <typename i_t, typename f_t>
+void best_score_last_permutation(std::vector<f_t>& scores, std::vector<i_t>& permutation)
+{
+  if (permutation.size() != scores.size()) { permutation.resize(scores.size()); }
+  std::iota(permutation.begin(), permutation.end(), 0);
+  std::sort(
+    permutation.begin(), permutation.end(), [&](i_t a, i_t b) { return scores[a] < scores[b]; });
+}
+
+// Routines for verifying cuts against a saved solution
+template <typename i_t, typename f_t>
+void read_saved_solution_for_cut_verification(const lp_problem_t<i_t, f_t>& lp,
+                                              const simplex_solver_settings_t<i_t, f_t>& settings,
+                                              std::vector<f_t>& saved_solution);
+
+template <typename i_t, typename f_t>
+void write_solution_for_cut_verification(const lp_problem_t<i_t, f_t>& lp,
+                                         const std::vector<f_t>& solution);
+
+template <typename i_t, typename f_t>
+void verify_cuts_against_saved_solution(const csr_matrix_t<i_t, f_t>& cuts,
+                                        const std::vector<f_t>& cut_rhs,
+                                        const std::vector<f_t>& saved_solution);
+
+template <typename i_t, typename f_t>
+class cut_pool_t {
+ public:
+  cut_pool_t(i_t original_vars, const simplex_solver_settings_t<i_t, f_t>& settings)
+    : original_vars_(original_vars),
+      settings_(settings),
+      cut_storage_(0, original_vars, 0),
+      rhs_storage_(0),
+      cut_age_(0),
+      cut_type_(0),
+      scored_cuts_(0)
+  {
+  }
+
+  // Add a cut in the form: cut'*x >= rhs.
+  // We expect that the cut is violated by the current relaxation xstar
+  // cut'*xstart < rhs
+  void add_cut(cut_type_t cut_type, const sparse_vector_t<i_t, f_t>& cut, f_t rhs);
+
+  void score_cuts(std::vector<f_t>& x_relax);
+
+  // We return the cuts in the form best_cuts*x <= best_rhs
+  i_t get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts,
+                    std::vector<f_t>& best_rhs,
+                    std::vector<cut_type_t>& best_cut_types);
+
+  void age_cuts();
+
+  void drop_cuts();
+
+  i_t pool_size() const { return cut_storage_.m; }
+
+  void print_cutpool_types() { print_cut_types("In cut pool", cut_type_, settings_); }
+
+ private:
+  f_t cut_distance(i_t row, const std::vector<f_t>& x, f_t& cut_violation, f_t& cut_norm);
+  f_t cut_density(i_t row);
+  f_t cut_orthogonality(i_t i, i_t j);
+
+  i_t original_vars_;
+  const simplex_solver_settings_t<i_t, f_t>& settings_;
+
+  csr_matrix_t<i_t, f_t> cut_storage_;
+  std::vector<f_t> rhs_storage_;
+  std::vector<i_t> cut_age_;
+  std::vector<cut_type_t> cut_type_;
+
+  i_t scored_cuts_;
+  std::vector<f_t> cut_distances_;
+  std::vector<f_t> cut_norms_;
+  std::vector<f_t> cut_orthogonality_;
+  std::vector<f_t> cut_scores_;
+  std::vector<i_t> best_cuts_;
+};
+
+template <typename i_t, typename f_t>
+class knapsack_generation_t {
+ public:
+  knapsack_generation_t(const lp_problem_t<i_t, f_t>& lp,
+                        const simplex_solver_settings_t<i_t, f_t>& settings,
+                        csr_matrix_t<i_t, f_t>& Arow,
+                        const std::vector<i_t>& new_slacks,
+                        const std::vector<variable_type_t>& var_types);
+
+  i_t generate_knapsack_cuts(const lp_problem_t<i_t, f_t>& lp,
+                             const simplex_solver_settings_t<i_t, f_t>& settings,
+                             csr_matrix_t<i_t, f_t>& Arow,
+                             const std::vector<i_t>& new_slacks,
+                             const std::vector<variable_type_t>& var_types,
+                             const std::vector<f_t>& xstar,
+                             i_t knapsack_row,
+                             sparse_vector_t<i_t, f_t>& cut,
+                             f_t& cut_rhs);
+
+  i_t num_knapsack_constraints() const { return knapsack_constraints_.size(); }
+  const std::vector<i_t>& get_knapsack_constraints() const { return knapsack_constraints_; }
+
+ private:
+  // Generate a heuristic solution to the 0-1 knapsack problem
+  f_t greedy_knapsack_problem(const std::vector<f_t>& values,
+                              const std::vector<f_t>& weights,
+                              f_t rhs,
+                              std::vector<f_t>& solution);
+
+  // Solve a 0-1 knapsack problem using dynamic programming
+  f_t solve_knapsack_problem(const std::vector<f_t>& values,
+                             const std::vector<f_t>& weights,
+                             f_t rhs,
+                             std::vector<f_t>& solution);
+
+  std::vector<i_t> is_slack_;
+  std::vector<i_t> knapsack_constraints_;
+  const simplex_solver_settings_t<i_t, f_t>& settings_;
+};
+
+// Forward declaration
+template <typename i_t, typename f_t>
+class mixed_integer_rounding_cut_t;
+
+template <typename i_t, typename f_t>
+class cut_generation_t {
+ public:
+  cut_generation_t(cut_pool_t<i_t, f_t>& cut_pool,
+                   const lp_problem_t<i_t, f_t>& lp,
+                   const simplex_solver_settings_t<i_t, f_t>& settings,
+                   csr_matrix_t<i_t, f_t>& Arow,
+                   const std::vector<i_t>& new_slacks,
+                   const std::vector<variable_type_t>& var_types)
+    : cut_pool_(cut_pool), knapsack_generation_(lp, settings, Arow, new_slacks, var_types)
+  {
+  }
+
+  void generate_cuts(const lp_problem_t<i_t, f_t>& lp,
+                     const simplex_solver_settings_t<i_t, f_t>& settings,
+                     csr_matrix_t<i_t, f_t>& Arow,
+                     const std::vector<i_t>& new_slacks,
+                     const std::vector<variable_type_t>& var_types,
+                     basis_update_mpf_t<i_t, f_t>& basis_update,
+                     const std::vector<f_t>& xstar,
+                     const std::vector<i_t>& basic_list,
+                     const std::vector<i_t>& nonbasic_list);
+
+ private:
+  // Generate all mixed integer gomory cuts
+  void generate_gomory_cuts(const lp_problem_t<i_t, f_t>& lp,
+                            const simplex_solver_settings_t<i_t, f_t>& settings,
+                            csr_matrix_t<i_t, f_t>& Arow,
+                            const std::vector<i_t>& new_slacks,
+                            const std::vector<variable_type_t>& var_types,
+                            basis_update_mpf_t<i_t, f_t>& basis_update,
+                            const std::vector<f_t>& xstar,
+                            const std::vector<i_t>& basic_list,
+                            const std::vector<i_t>& nonbasic_list);
+
+  // Generate all mixed integer rounding cuts
+  void generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
+                         const simplex_solver_settings_t<i_t, f_t>& settings,
+                         csr_matrix_t<i_t, f_t>& Arow,
+                         const std::vector<i_t>& new_slacks,
+                         const std::vector<variable_type_t>& var_types,
+                         const std::vector<f_t>& xstar);
+
+  // Generate all knapsack cuts
+  void generate_knapsack_cuts(const lp_problem_t<i_t, f_t>& lp,
+                              const simplex_solver_settings_t<i_t, f_t>& settings,
+                              csr_matrix_t<i_t, f_t>& Arow,
+                              const std::vector<i_t>& new_slacks,
+                              const std::vector<variable_type_t>& var_types,
+                              const std::vector<f_t>& xstar);
+
+  cut_pool_t<i_t, f_t>& cut_pool_;
+  knapsack_generation_t<i_t, f_t> knapsack_generation_;
+};
+
+template <typename i_t, typename f_t>
+class tableau_equality_t {
+ public:
+  tableau_equality_t(const lp_problem_t<i_t, f_t>& lp,
+                     basis_update_mpf_t<i_t, f_t>& basis_update,
+                     const std::vector<i_t>& nonbasic_list)
+    : b_bar_(lp.num_rows, 0.0),
+      nonbasic_mark_(lp.num_cols, 0),
+      x_workspace_(lp.num_cols, 0.0),
+      x_mark_(lp.num_cols, 0),
+      c_workspace_(lp.num_cols, 0.0)
+  {
+    basis_update.b_solve(lp.rhs, b_bar_);
+    for (i_t j : nonbasic_list) {
+      nonbasic_mark_[j] = 1;
+    }
+  }
+
+  // Generates the base inequalities: C*x == d that will be turned into cuts
+  i_t generate_base_equality(const lp_problem_t<i_t, f_t>& lp,
+                             const simplex_solver_settings_t<i_t, f_t>& settings,
+                             csr_matrix_t<i_t, f_t>& Arow,
+                             const std::vector<variable_type_t>& var_types,
+                             basis_update_mpf_t<i_t, f_t>& basis_update,
+                             const std::vector<f_t>& xstar,
+                             const std::vector<i_t>& basic_list,
+                             const std::vector<i_t>& nonbasic_list,
+                             i_t i,
+                             sparse_vector_t<i_t, f_t>& inequality,
+                             f_t& inequality_rhs);
+
+ private:
+  std::vector<f_t> b_bar_;
+  std::vector<i_t> nonbasic_mark_;
+  std::vector<f_t> x_workspace_;
+  std::vector<i_t> x_mark_;
+  std::vector<f_t> c_workspace_;
+};
+
+template <typename i_t, typename f_t>
+class mixed_integer_rounding_cut_t {
+ public:
+  mixed_integer_rounding_cut_t(const lp_problem_t<i_t, f_t>& lp,
+                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                               const std::vector<i_t>& new_slacks,
+                               const std::vector<f_t>& xstar);
+
+  // Convert an inequality of the form: sum_j a_j x_j >= beta
+  // with l_j <= x_j <= u_j into the form:
+  // sum_{j not in L union U} d_j x_j + sum_{j in L} d_j v_j
+  // + sum_{j in U} d_j w_j >= delta,
+  // where v_j = x_j - l_j for j in L
+  // and   w_j = u_j - x_j for j in Us
+  void to_nonnegative(const lp_problem_t<i_t, f_t>& lp,
+                      sparse_vector_t<i_t, f_t>& inequality,
+                      f_t& rhs);
+
+  void relaxation_to_nonnegative(const lp_problem_t<i_t, f_t>& lp,
+                                 const std::vector<f_t>& xstar,
+                                 std::vector<f_t>& xstar_nonnegative);
+
+  // Convert an inequality of the form:
+  // sum_{j not in L union U} d_j x_j + sum_{j in L} d_j v_j
+  // + sum_{j in U} d_j w_j >= delta
+  // where v_j = x_j - l_j for j in L
+  // and   w_j = u_j - x_j for j in U
+  // back to an inequality on the original variables
+  // sum_j a_j x_j >= beta
+  void to_original(const lp_problem_t<i_t, f_t>& lp,
+                   sparse_vector_t<i_t, f_t>& inequality,
+                   f_t& rhs);
+
+  // Given a cut of the form sum_j d_j x_j >= beta
+  // with l_j <= x_j <= u_j, try to remove coefficients d_j
+  // with | d_j | < epsilon
+  void remove_small_coefficients(const std::vector<f_t>& lower_bounds,
+                                 const std::vector<f_t>& upper_bounds,
+                                 sparse_vector_t<i_t, f_t>& cut,
+                                 f_t& cut_rhs);
+
+  // Given an inequality sum_j a_j x_j >= beta, x_j >= 0, x_j in Z, j in I
+  // generate an MIR cut of the form sum_j d_j x_j >= delta
+  i_t generate_cut_nonnegative(const sparse_vector_t<i_t, f_t>& a,
+                               f_t beta,
+                               const std::vector<variable_type_t>& var_types,
+                               sparse_vector_t<i_t, f_t>& cut,
+                               f_t& cut_rhs);
+
+  f_t compute_violation(const sparse_vector_t<i_t, f_t>& cut,
+                        f_t cut_rhs,
+                        const std::vector<f_t>& xstar);
+
+  i_t generate_cut(const sparse_vector_t<i_t, f_t>& a,
+                   f_t beta,
+                   const std::vector<f_t>& upper_bounds,
+                   const std::vector<f_t>& lower_bounds,
+                   const std::vector<variable_type_t>& var_types,
+                   sparse_vector_t<i_t, f_t>& cut,
+                   f_t& cut_rhs);
+
+  void substitute_slacks(const lp_problem_t<i_t, f_t>& lp,
+                         csr_matrix_t<i_t, f_t>& Arow,
+                         sparse_vector_t<i_t, f_t>& cut,
+                         f_t& cut_rhs);
+
+  // Combine the pivot row with the inequality to eliminate the variable j
+  // The new inequality is returned in inequality and inequality_rhs
+  void combine_rows(const lp_problem_t<i_t, f_t>& lp,
+                    csr_matrix_t<i_t, f_t>& Arow,
+                    i_t j,
+                    const sparse_vector_t<i_t, f_t>& pivot_row,
+                    f_t pivot_row_rhs,
+                    sparse_vector_t<i_t, f_t>& inequality,
+                    f_t& inequality_rhs);
+
+ private:
+  i_t num_vars_;
+  const simplex_solver_settings_t<i_t, f_t>& settings_;
+  std::vector<f_t> x_workspace_;
+  std::vector<i_t> x_mark_;
+  std::vector<i_t> has_lower_;
+  std::vector<i_t> has_upper_;
+  std::vector<i_t> is_slack_;
+  std::vector<i_t> slack_rows_;
+  std::vector<i_t> indices_;
+  std::vector<i_t> bound_info_;
+  bool needs_complement_;
+};
+
+template <typename i_t, typename f_t>
+class strong_cg_cut_t {
+ public:
+  strong_cg_cut_t(const lp_problem_t<i_t, f_t>& lp,
+                  const std::vector<variable_type_t>& var_types,
+                  const std::vector<f_t>& xstar);
+
+  i_t generate_strong_cg_cut(const lp_problem_t<i_t, f_t>& lp,
+                             const simplex_solver_settings_t<i_t, f_t>& settings,
+                             const std::vector<variable_type_t>& var_types,
+                             const sparse_vector_t<i_t, f_t>& inequality,
+                             const f_t inequality_rhs,
+                             const std::vector<f_t>& xstar,
+                             sparse_vector_t<i_t, f_t>& cut,
+                             f_t& cut_rhs);
+
+  i_t remove_continuous_variables_integers_nonnegative(
+    const lp_problem_t<i_t, f_t>& lp,
+    const simplex_solver_settings_t<i_t, f_t>& settings,
+    const std::vector<variable_type_t>& var_types,
+    sparse_vector_t<i_t, f_t>& inequality,
+    f_t& inequality_rhs);
+
+  void to_original_integer_variables(const lp_problem_t<i_t, f_t>& lp,
+                                     sparse_vector_t<i_t, f_t>& cut,
+                                     f_t& cut_rhs);
+
+  i_t generate_strong_cg_cut_integer_only(const simplex_solver_settings_t<i_t, f_t>& settings,
+                                          const std::vector<variable_type_t>& var_types,
+                                          const sparse_vector_t<i_t, f_t>& inequality,
+                                          f_t inequality_rhs,
+                                          sparse_vector_t<i_t, f_t>& cut,
+                                          f_t& cut_rhs);
+
+ private:
+  i_t generate_strong_cg_cut_helper(const std::vector<i_t>& indicies,
+                                    const std::vector<f_t>& coefficients,
+                                    f_t rhs,
+                                    const std::vector<variable_type_t>& var_types,
+                                    sparse_vector_t<i_t, f_t>& cut,
+                                    f_t& cut_rhs);
+
+  std::vector<i_t> transformed_variables_;
+};
+
+template <typename i_t, typename f_t>
+i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
+             const csr_matrix_t<i_t, f_t>& cuts,
+             const std::vector<f_t>& cut_rhs,
+             lp_problem_t<i_t, f_t>& lp,
+             std::vector<i_t>& new_slacks,
+             lp_solution_t<i_t, f_t>& solution,
+             basis_update_mpf_t<i_t, f_t>& basis_update,
+             std::vector<i_t>& basic_list,
+             std::vector<i_t>& nonbasic_list,
+             std::vector<variable_status_t>& vstatus,
+             std::vector<f_t>& edge_norms);
+
+template <typename i_t, typename f_t>
+void remove_cuts(lp_problem_t<i_t, f_t>& lp,
+                 const simplex_solver_settings_t<i_t, f_t>& settings,
+                 csr_matrix_t<i_t, f_t>& Arow,
+                 std::vector<i_t>& new_slacks,
+                 i_t original_rows,
+                 std::vector<variable_type_t>& var_types,
+                 std::vector<variable_status_t>& vstatus,
+                 std::vector<f_t>& edge_norms,
+                 std::vector<f_t>& x,
+                 std::vector<f_t>& y,
+                 std::vector<f_t>& z,
+                 std::vector<i_t>& basic_list,
+                 std::vector<i_t>& nonbasic_list,
+                 basis_update_mpf_t<i_t, f_t>& basis_update);
+
+}  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/dual_simplex/dense_matrix.hpp b/cpp/src/dual_simplex/dense_matrix.hpp
index b1fc521b32..7f3f08813c 100644
--- a/cpp/src/dual_simplex/dense_matrix.hpp
+++ b/cpp/src/dual_simplex/dense_matrix.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -18,6 +18,8 @@ class dense_matrix_t {
  public:
   dense_matrix_t(i_t rows, i_t cols) : m(rows), n(cols), values(rows * cols, 0.0) {}
 
+  dense_matrix_t(i_t rows, i_t cols, f_t value) : m(rows), n(cols), values(rows * cols, value) {}
+
   void resize(i_t rows, i_t cols)
   {
     m = rows;
diff --git a/cpp/src/dual_simplex/folding.cpp b/cpp/src/dual_simplex/folding.cpp
index 6eae5eb1ed..c59d827c51 100644
--- a/cpp/src/dual_simplex/folding.cpp
+++ b/cpp/src/dual_simplex/folding.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -386,16 +386,18 @@ void color_lower_bounds(const csc_matrix_t<i_t, f_t>& A,
   col_lower_bound = static_cast<i_t>(unique_col_sums.size());
 }
 
+enum coloring_status_t : int8_t { COLORING_SUCCESS = 0, COLORING_FAILED = -1 };
+
 template <typename i_t, typename f_t>
-i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
-                const simplex_solver_settings_t<i_t, f_t>& settings,
-                std::vector<color_t<i_t>>& colors,
-                i_t row_threshold,
-                i_t col_threshold,
-                i_t& num_row_colors,
-                i_t& num_col_colors,
-                i_t& num_colors,
-                i_t& total_colors_seen)
+coloring_status_t color_graph(const csc_matrix_t<i_t, f_t>& A,
+                              const simplex_solver_settings_t<i_t, f_t>& settings,
+                              std::vector<color_t<i_t>>& colors,
+                              i_t row_threshold,
+                              i_t col_threshold,
+                              i_t& num_row_colors,
+                              i_t& num_col_colors,
+                              i_t& num_colors,
+                              i_t& total_colors_seen)
 {
   f_t start_time    = tic();
   f_t last_log_time = start_time;
@@ -421,7 +423,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
       row_threshold,
       col_lower_bound,
       col_threshold);
-    return -1;
+    return coloring_status_t::COLORING_FAILED;
   }
 
   std::vector<i_t> all_rows_vertices(m);
@@ -577,7 +579,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
     for (i_t k = 0; k < max_vertices; k++) {
       if (vertex_to_sum[k] != 0.0) {
         settings.log.printf("Folding: Vertex %d has sum %e\n", k, vertex_to_sum[k]);
-        return -2;
+        return coloring_status_t::COLORING_FAILED;
       }
     }
 #endif
@@ -594,7 +596,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
         settings.log.printf("Folding: Color %d has %ld vertices to refine. Not cleared\n",
                             k,
                             vertices_to_refine_by_color[k].size());
-        return -2;
+        return coloring_status_t::COLORING_FAILED;
       }
     }
 #endif
@@ -604,7 +606,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
       if (row_color_map[i] >= total_colors_seen) {
         settings.log.printf("Folding: Row color %d is not in the colors vector\n",
                             row_color_map[i]);
-        return -2;
+        return coloring_status_t::COLORING_FAILED;
       }
     }
     for (i_t j = 0; j < n; j++) {
@@ -612,7 +614,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
         settings.log.printf("Folding: Column color %d is not in the colors vector. %d\n",
                             col_color_map[j],
                             num_colors);
-        return -2;
+        return coloring_status_t::COLORING_FAILED;
       }
     }
 #endif
@@ -634,7 +636,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
                 row_color_map[v],
                 color.color,
                 v);
-              return -2;
+              return coloring_status_t::COLORING_FAILED;
             }
           }
         } else {
@@ -646,7 +648,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
                 col_color_map[v],
                 color.color,
                 v);
-              return -2;
+              return coloring_status_t::COLORING_FAILED;
             }
           }
         }
@@ -655,19 +657,19 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
     // printf("Number of active colors: %d\n", num_active_colors);
     if (num_active_colors != num_colors) {
       settings.log.printf("Folding: Number of active colors does not match number of colors\n");
-      return -2;
+      return coloring_status_t::COLORING_FAILED;
     }
     // printf("Number of active row colors: %d\n", num_active_row_colors);
     if (num_active_row_colors != num_row_colors) {
       settings.log.printf(
         "Folding: Number of active row colors does not match number of row colors\n");
-      return -2;
+      return coloring_status_t::COLORING_FAILED;
     }
     // printf("Number of active column colors: %d\n", num_active_col_colors);
     if (num_active_col_colors != num_col_colors) {
       settings.log.printf(
         "Folding: Number of active column colors does not match number of column colors\n");
-      return -2;
+      return coloring_status_t::COLORING_FAILED;
     }
 #endif
 
@@ -694,23 +696,23 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
     }
     if (num_row_colors >= max_vertices) {
       settings.log.printf("Folding: Too many row colors %d max %d\n", num_row_colors, max_vertices);
-      return -2;
+      return coloring_status_t::COLORING_FAILED;
     }
     if (num_col_colors >= max_vertices) {
       settings.log.printf(
         "Folding: Too many column colors %d max %d\n", num_col_colors, max_vertices);
-      return -2;
+      return coloring_status_t::COLORING_FAILED;
     }
 
     if (num_row_colors > row_threshold || num_col_colors > col_threshold) {
       settings.log.printf("Folding: Number of colors exceeds threshold");
-      return -1;
+      return coloring_status_t::COLORING_FAILED;
     }
   }
   settings.log.printf(
     "Folding: Colors %d. Refinements: %d\n", num_row_colors + num_col_colors, num_refinements);
 
-  return 0;
+  return coloring_status_t::COLORING_SUCCESS;
 }
 
 template <typename i_t, typename f_t>
@@ -870,16 +872,17 @@ void folding(lp_problem_t<i_t, f_t>& problem,
   f_t fold_threshold   = settings.folding == -1 ? 0.50 : 1.0;
   i_t row_threshold    = static_cast<i_t>(fold_threshold * static_cast<f_t>(m));
   i_t col_threshold    = static_cast<i_t>(fold_threshold * static_cast<f_t>(n));
-  i_t status           = color_graph(augmented,
-                           settings,
-                           colors,
-                           row_threshold,
-                           col_threshold,
-                           num_row_colors,
-                           num_col_colors,
-                           num_colors,
-                           total_colors_seen);
-  if (status != 0) {
+
+  coloring_status_t status = color_graph(augmented,
+                                         settings,
+                                         colors,
+                                         row_threshold,
+                                         col_threshold,
+                                         num_row_colors,
+                                         num_col_colors,
+                                         num_colors,
+                                         total_colors_seen);
+  if (status != coloring_status_t::COLORING_SUCCESS) {
     settings.log.printf("Folding: Coloring aborted in %.2f seconds\n", toc(color_start_time));
     return;
   }
diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp
index de147132a3..5ee4f49d13 100644
--- a/cpp/src/dual_simplex/mip_node.hpp
+++ b/cpp/src/dual_simplex/mip_node.hpp
@@ -60,6 +60,7 @@ class mip_node_t {
       node_id(0),
       branch_var(-1),
       branch_dir(rounding_direction_t::NONE),
+      integer_infeasible(-1),
       objective_estimate(std::numeric_limits<f_t>::infinity()),
       vstatus(basis)
   {
@@ -73,6 +74,7 @@ class mip_node_t {
              i_t branch_variable,
              rounding_direction_t branch_direction,
              f_t branch_var_value,
+             i_t integer_inf,
              const std::vector<variable_status_t>& basis)
     : status(node_status_t::PENDING),
       lower_bound(parent_node->lower_bound),
@@ -82,9 +84,9 @@ class mip_node_t {
       branch_var(branch_variable),
       branch_dir(branch_direction),
       fractional_val(branch_var_value),
+      integer_infeasible(integer_inf),
       objective_estimate(parent_node->objective_estimate),
       vstatus(basis)
-
   {
     branch_var_lower = branch_direction == rounding_direction_t::DOWN ? problem.lower[branch_var]
                                                                       : std::ceil(branch_var_value);
@@ -250,6 +252,7 @@ class mip_node_t {
   f_t branch_var_lower;
   f_t branch_var_upper;
   f_t fractional_val;
+  i_t integer_infeasible;
 
   mip_node_t<i_t, f_t>* parent;
   std::unique_ptr<mip_node_t> children[2];
@@ -285,6 +288,7 @@ class search_tree_t {
   void branch(mip_node_t<i_t, f_t>* parent_node,
               const i_t branch_var,
               const f_t fractional_val,
+              const i_t integer_infeasible,
               const std::vector<variable_status_t>& parent_vstatus,
               const lp_problem_t<i_t, f_t>& original_lp,
               logger_t& log)
@@ -297,8 +301,8 @@ class search_tree_t {
                                                              branch_var,
                                                              rounding_direction_t::DOWN,
                                                              fractional_val,
+                                                             integer_infeasible,
                                                              parent_vstatus);
-
     graphviz_edge(log,
                   parent_node,
                   down_child.get(),
@@ -312,6 +316,7 @@ class search_tree_t {
                                                            branch_var,
                                                            rounding_direction_t::UP,
                                                            fractional_val,
+                                                           integer_infeasible,
                                                            parent_vstatus);
 
     graphviz_edge(log,
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index a54101ec88..c74841c1cf 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -1232,7 +1232,9 @@ i_t initialize_steepest_edge_norms(const lp_problem_t<i_t, f_t>& lp,
       settings.log.printf("Initialized %d of %d steepest edge norms in %.2fs\n", k, m, now);
     }
     if (toc(start_time) > settings.time_limit) { return -1; }
-    if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return -1; }
+    if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+      return CONCURRENT_HALT_RETURN;
+    }
   }
   return 0;
 }
@@ -1733,6 +1735,69 @@ f_t dual_infeasibility(const lp_problem_t<i_t, f_t>& lp,
   return sum_infeasible;
 }
 
+template <typename i_t, typename f_t>
+f_t primal_infeasibility_breakdown(const lp_problem_t<i_t, f_t>& lp,
+                                   const simplex_solver_settings_t<i_t, f_t>& settings,
+                                   const std::vector<variable_status_t>& vstatus,
+                                   const std::vector<f_t>& x,
+                                   f_t& basic_infeas,
+                                   f_t& nonbasic_infeas,
+                                   f_t& basic_over)
+{
+  const i_t n     = lp.num_cols;
+  f_t primal_inf  = 0;
+  basic_infeas    = 0.0;
+  basic_over      = 0.0;
+  nonbasic_infeas = 0.0;
+  for (i_t j = 0; j < n; ++j) {
+    if (x[j] < lp.lower[j]) {
+      // x_j < l_j => -x_j > -l_j => -x_j + l_j > 0
+      const f_t infeas = -x[j] + lp.lower[j];
+      if (vstatus[j] == variable_status_t::BASIC) {
+        basic_infeas += infeas;
+        if (infeas > settings.primal_tol) { basic_over += infeas; }
+      } else {
+        nonbasic_infeas += infeas;
+      }
+      primal_inf += infeas;
+#ifdef PRIMAL_INFEASIBLE_DEBUG
+      if (infeas > settings.primal_tol) {
+        settings.log.printf("x %d infeas %e lo %e val %e up %e vstatus %d\n",
+                            j,
+                            infeas,
+                            lp.lower[j],
+                            x[j],
+                            lp.upper[j],
+                            static_cast<int>(vstatus[j]));
+      }
+#endif
+    }
+    if (x[j] > lp.upper[j]) {
+      // x_j > u_j => x_j - u_j > 0
+      const f_t infeas = x[j] - lp.upper[j];
+      if (vstatus[j] == variable_status_t::BASIC) {
+        basic_infeas += infeas;
+        if (infeas > settings.primal_tol) { basic_over += infeas; }
+      } else {
+        nonbasic_infeas += infeas;
+      }
+      primal_inf += infeas;
+#ifdef PRIMAL_INFEASIBLE_DEBUG
+      if (infeas > settings.primal_tol) {
+        settings.log.printf("x %d infeas %e lo %e val %e up %e vstatus %d\n",
+                            j,
+                            infeas,
+                            lp.lower[j],
+                            x[j],
+                            lp.upper[j],
+                            static_cast<int>(vstatus[j]));
+      }
+#endif
+    }
+  }
+  return primal_inf;
+}
+
 template <typename i_t, typename f_t>
 f_t primal_infeasibility(const lp_problem_t<i_t, f_t>& lp,
                          const simplex_solver_settings_t<i_t, f_t>& settings,
@@ -1914,6 +1979,7 @@ void set_primal_variables_on_bounds(const lp_problem_t<i_t, f_t>& lp,
                                     std::vector<f_t>& x)
 {
   const i_t n = lp.num_cols;
+  f_t tol     = 1e-10;
   for (i_t j = 0; j < n; ++j) {
     // We set z_j = 0 for basic variables
     // But we explicitally skip setting basic variables here
@@ -1931,9 +1997,11 @@ void set_primal_variables_on_bounds(const lp_problem_t<i_t, f_t>& lp,
       }
       x[j]       = lp.lower[j];
       vstatus[j] = variable_status_t::NONBASIC_FIXED;
-    } else if (z[j] == 0 && lp.lower[j] > -inf && vstatus[j] == variable_status_t::NONBASIC_LOWER) {
+    } else if (z[j] >= -tol && lp.lower[j] > -inf &&
+               vstatus[j] == variable_status_t::NONBASIC_LOWER) {
       x[j] = lp.lower[j];
-    } else if (z[j] == 0 && lp.upper[j] < inf && vstatus[j] == variable_status_t::NONBASIC_UPPER) {
+    } else if (z[j] <= tol && lp.upper[j] < inf &&
+               vstatus[j] == variable_status_t::NONBASIC_UPPER) {
       x[j] = lp.upper[j];
     } else if (z[j] >= 0 && lp.lower[j] > -inf) {
       if (vstatus[j] != variable_status_t::NONBASIC_LOWER) {
@@ -2018,7 +2086,9 @@ f_t amount_of_perturbation(const lp_problem_t<i_t, f_t>& lp, const std::vector<f
 }
 
 template <typename i_t, typename f_t>
-void prepare_optimality(const lp_problem_t<i_t, f_t>& lp,
+void prepare_optimality(i_t info,
+                        f_t orig_primal_infeas,
+                        const lp_problem_t<i_t, f_t>& lp,
                         const simplex_solver_settings_t<i_t, f_t>& settings,
                         basis_update_mpf_t<i_t, f_t>& ft,
                         const std::vector<f_t>& objective,
@@ -2037,9 +2107,10 @@ void prepare_optimality(const lp_problem_t<i_t, f_t>& lp,
   const i_t m = lp.num_rows;
   const i_t n = lp.num_cols;
 
-  sol.objective      = compute_objective(lp, sol.x);
-  sol.user_objective = compute_user_objective(lp, sol.objective);
-  f_t perturbation   = phase2::amount_of_perturbation(lp, objective);
+  sol.objective         = compute_objective(lp, sol.x);
+  sol.user_objective    = compute_user_objective(lp, sol.objective);
+  f_t perturbation      = phase2::amount_of_perturbation(lp, objective);
+  f_t orig_perturbation = perturbation;
   if (perturbation > 1e-6 && phase == 2) {
     // Try to remove perturbation
     std::vector<f_t> unperturbed_y(m);
@@ -2079,6 +2150,27 @@ void prepare_optimality(const lp_problem_t<i_t, f_t>& lp,
       settings.log.printf("Perturbation:               %.2e\n", perturbation);
     }
   }
+
+#ifdef CHECK_PRIMAL_INFEASIBILITIES
+  if (primal_infeas > 10.0 * settings.primal_tol) {
+    f_t basic_infeas    = 0.0;
+    f_t nonbasic_infeas = 0.0;
+    f_t basic_over      = 0.0;
+    phase2::primal_infeasibility_breakdown(
+      lp, settings, vstatus, x, basic_infeas, nonbasic_infeas, basic_over);
+    settings.log.printf(
+      "Primal infeasibility %e/%e (Basic %e, Nonbasic %e, Basic over %e). Perturbation %e/%e. Info "
+      "%d\n",
+      primal_infeas,
+      orig_primal_infeas,
+      basic_infeas,
+      nonbasic_infeas,
+      basic_over,
+      orig_perturbation,
+      perturbation,
+      info);
+  }
+#endif
 }
 
 template <typename i_t, typename f_t>
@@ -2319,12 +2411,19 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
         basic_list, nonbasic_list, delta_y_steepest_edge);
     } else {
       std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1);
-      if (phase2::initialize_steepest_edge_norms(
-            lp, settings, start_time, basic_list, ft, delta_y_steepest_edge) == -1) {
-        return dual::status_t::TIME_LIMIT;
-      }
+      f_t steepest_edge_start = tic();
+      i_t status              = phase2::initialize_steepest_edge_norms(
+        lp, settings, start_time, basic_list, ft, delta_y_steepest_edge);
+      f_t steepest_edge_time = toc(steepest_edge_start);
+      if (status == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; }
+      if (status == -1) { return dual::status_t::TIME_LIMIT; }
     }
   } else {
+    // Check that none of the basic variables have a steepest edge that is nonpositive
+    for (i_t k = 0; k < m; k++) {
+      const i_t j = basic_list[k];
+      if (delta_y_steepest_edge[j] <= 0.0) { delta_y_steepest_edge[j] = 1e-4; }
+    }
     settings.log.printf("using exisiting steepest edge %e\n",
                         vector_norm2<i_t, f_t>(delta_y_steepest_edge));
   }
@@ -2380,6 +2479,16 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
   i_t dense_delta_z  = 0;
   phase2::phase2_timers_t<i_t, f_t> timers(false);
 
+  if (phase == 2) {
+    settings.log.printf("%5d %+.16e %7d %.8e %.2e %.2f\n",
+                        0,
+                        compute_user_objective(lp, obj),
+                        infeasibility_indices.size(),
+                        primal_infeasibility_squared,
+                        0.0,
+                        toc(start_time));
+  }
+
   while (iter < iter_limit) {
     // Pricing
     i_t direction           = 0;
@@ -2405,7 +2514,67 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
     }
     timers.pricing_time += timers.stop_timer();
     if (leaving_index == -1) {
-      phase2::prepare_optimality(lp,
+#ifdef CHECK_BASIS_UPDATE
+      for (i_t k = 0; k < basic_list.size(); k++) {
+        const i_t jj = basic_list[k];
+        sparse_vector_t<i_t, f_t> ei_sparse(m, 1);
+        ei_sparse.i[0] = k;
+        ei_sparse.x[0] = 1.0;
+        sparse_vector_t<i_t, f_t> ubar_sparse(m, 0);
+        ft.b_transpose_solve(ei_sparse, ubar_sparse);
+        std::vector<f_t> ubar_dense(m);
+        ubar_sparse.to_dense(ubar_dense);
+        std::vector<f_t> BTu_dense(m);
+        b_transpose_multiply(lp, basic_list, ubar_dense, BTu_dense);
+        for (i_t l = 0; l < m; l++) {
+          if (l != k) {
+            settings.log.printf("BTu_dense[%d] = %e i %d\n", l, BTu_dense[l], k);
+          } else {
+            settings.log.printf("BTu_dense[%d] = %e != 1.0 i %d\n", l, BTu_dense[l], k);
+          }
+        }
+        for (i_t h = 0; h < m; h++) {
+          settings.log.printf("i %d ubar_dense[%d] = %.16e\n", k, h, ubar_dense[h]);
+        }
+      }
+      settings.log.printf("ft.num_updates() %d\n", ft.num_updates());
+      for (i_t h = 0; h < m; h++) {
+        settings.log.printf("basic_list[%d] = %d\n", h, basic_list[h]);
+      }
+
+#endif
+
+#ifdef CHECK_PRIMAL_INFEASIBILITIES
+      primal_infeasibility_squared =
+        phase2::compute_initial_primal_infeasibilities(lp,
+                                                       settings,
+                                                       basic_list,
+                                                       x,
+                                                       squared_infeasibilities,
+                                                       infeasibility_indices,
+                                                       primal_infeasibility);
+      if (primal_infeasibility > settings.primal_tol) {
+        const i_t nz = infeasibility_indices.size();
+        for (i_t k = 0; k < nz; ++k) {
+          const i_t j              = infeasibility_indices[k];
+          const f_t squared_infeas = squared_infeasibilities[j];
+          const f_t val            = squared_infeas / delta_y_steepest_edge[j];
+          if (squared_infeas >= 0.0 && delta_y_steepest_edge[j] < 0.0) {
+            settings.log.printf(
+              "Iter %d potential leaving %d val %e squared infeas %e delta_y_steepest_edge %e\n",
+              iter,
+              j,
+              val,
+              squared_infeas,
+              delta_y_steepest_edge[j]);
+          }
+        }
+      }
+#endif
+
+      phase2::prepare_optimality(0,
+                                 primal_infeasibility,
+                                 lp,
                                  settings,
                                  ft,
                                  objective,
@@ -2527,7 +2696,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
                                                  delta_z_indices,
                                                  nonbasic_mark);
       entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index);
-      if (entering_index == -4) {
+      if (entering_index == RATIO_TEST_NUMERICAL_ISSUES) {
         settings.log.printf("Numerical issues encountered in ratio test.\n");
         return dual::status_t::NUMERICAL;
       }
@@ -2536,9 +2705,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
       entering_index = phase2::phase2_ratio_test(
         lp, settings, vstatus, nonbasic_list, z, delta_z, step_length, nonbasic_entering_index);
     }
-    if (entering_index == -2) { return dual::status_t::TIME_LIMIT; }
-    if (entering_index == -3) { return dual::status_t::CONCURRENT_LIMIT; }
-    if (entering_index == -1) {
+    if (entering_index == RATIO_TEST_TIME_LIMIT) { return dual::status_t::TIME_LIMIT; }
+    if (entering_index == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; }
+    if (entering_index == RATIO_TEST_NO_ENTERING_VARIABLE) {
       settings.log.printf("No entering variable found. Iter %d\n", iter);
       settings.log.printf("Scaled infeasibility %e\n", max_val);
       f_t perturbation = phase2::amount_of_perturbation(lp, objective);
@@ -2577,7 +2746,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             // Need to reset the objective value, since we have recomputed x
             obj = phase2::compute_perturbed_objective(objective, x);
             if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) {
-              phase2::prepare_optimality(lp,
+              phase2::prepare_optimality(1,
+                                         primal_infeasibility,
+                                         lp,
                                          settings,
                                          ft,
                                          objective,
@@ -2620,7 +2791,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 
             if (primal_infeasibility <= settings.primal_tol &&
                 orig_dual_infeas <= settings.dual_tol) {
-              phase2::prepare_optimality(lp,
+              phase2::prepare_optimality(2,
+                                         primal_infeasibility,
+                                         lp,
                                          settings,
                                          ft,
                                          objective,
@@ -2849,7 +3022,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
                                                squared_infeasibilities,
                                                infeasibility_indices,
                                                entering_index,
-                                               primal_infeasibility);
+                                               primal_infeasibility_squared);
 
     phase2::clean_up_infeasibilities(squared_infeasibilities, infeasibility_indices);
 
diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp
index bbfe18d61d..9788968874 100644
--- a/cpp/src/dual_simplex/presolve.cpp
+++ b/cpp/src/dual_simplex/presolve.cpp
@@ -271,8 +271,6 @@ i_t convert_less_than_to_equal(const user_problem_t<i_t, f_t>& user_problem,
   // We must convert rows in the form: a_i^T x <= beta
   // into: a_i^T x + s_i = beta, s_i >= 0
 
-  csr_matrix_t<i_t, f_t> Arow(0, 0, 0);
-  problem.A.to_compressed_row(Arow);
   i_t num_cols = problem.num_cols + less_rows;
   i_t nnz      = problem.A.col_start[problem.num_cols] + less_rows;
   problem.A.col_start.resize(num_cols + 1);
@@ -446,7 +444,7 @@ i_t find_dependent_rows(lp_problem_t<i_t, f_t>& problem,
   std::vector<i_t> q(m);
 
   i_t pivots = right_looking_lu_row_permutation_only(C, settings, 1e-13, tic(), q, pinv);
-
+  if (pivots == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; }
   if (pivots < m) {
     settings.log.printf("Found %d dependent rows\n", m - pivots);
     const i_t num_dependent = m - pivots;
@@ -629,8 +627,8 @@ void convert_user_problem(const user_problem_t<i_t, f_t>& user_problem,
 
     // Empty var_types means that all variables are continuous
     bounds_strengthening_t<i_t, f_t> strengthening(problem, Arow, row_sense, {});
-    std::fill(strengthening.bounds_changed.begin(), strengthening.bounds_changed.end(), true);
-    strengthening.bounds_strengthening(problem.lower, problem.upper, settings);
+    std::vector<bool> bounds_changed(problem.num_cols, true);
+    strengthening.bounds_strengthening(settings, bounds_changed, problem.lower, problem.upper);
   }
 
   settings.log.debug(
@@ -1101,6 +1099,7 @@ i_t presolve(const lp_problem_t<i_t, f_t>& original,
     i_t infeasible;
     f_t dependent_row_start    = tic();
     const i_t independent_rows = find_dependent_rows(problem, settings, dependent_rows, infeasible);
+    if (independent_rows == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; }
     if (infeasible != kOk) {
       settings.log.printf("Found problem infeasible in presolve\n");
       return -1;
diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp
index 69f15ba188..38cddc0e24 100644
--- a/cpp/src/dual_simplex/primal.cpp
+++ b/cpp/src/dual_simplex/primal.cpp
@@ -296,7 +296,9 @@ primal::status_t primal_phase2(i_t phase,
   std::vector<i_t> slacks_needed;
   i_t rank =
     factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
-  if (rank != m) {
+  if (rank == CONCURRENT_HALT_RETURN) {
+    return primal::status_t::CONCURRENT_LIMIT;
+  } else if (rank != m) {
     settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
     basis_repair(lp.A,
                  settings,
@@ -307,8 +309,10 @@ primal::status_t primal_phase2(i_t phase,
                  basic_list,
                  nonbasic_list,
                  vstatus);
-    if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) ==
-        -1) {
+    rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
+    if (rank == CONCURRENT_HALT_RETURN) {
+      return primal::status_t::CONCURRENT_LIMIT;
+    } else if (rank == -1) {
       settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
       return primal::status_t::NUMERICAL;
     } else {
diff --git a/cpp/src/dual_simplex/primal.hpp b/cpp/src/dual_simplex/primal.hpp
index a5d356fdbc..bd72fee4b3 100644
--- a/cpp/src/dual_simplex/primal.hpp
+++ b/cpp/src/dual_simplex/primal.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -23,7 +23,9 @@ enum class status_t {
   PRIMAL_UNBOUNDED = 1,
   NUMERICAL        = 2,
   NOT_LOADED       = 3,
-  ITERATION_LIMIT  = 4
+  TIME_LIMIT       = 4,
+  ITERATION_LIMIT  = 5,
+  CONCURRENT_LIMIT = 6
 };
 }
 
diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp
index f56f3be4be..015034b775 100644
--- a/cpp/src/dual_simplex/pseudo_costs.cpp
+++ b/cpp/src/dual_simplex/pseudo_costs.cpp
@@ -210,9 +210,6 @@ static cuopt::mps_parser::mps_data_model_t<i_t, f_t> simplex_problem_to_mps_data
 
   mps_model.set_constraint_lower_bounds(constraint_lower.data(), m);
   mps_model.set_constraint_upper_bounds(constraint_upper.data(), m);
-
-  // TODO verify
-  // Set maximize flag (obj_scale: 1.0 for min, -1.0 for max)
   mps_model.set_maximize(user_problem.obj_scale < 0);
 
   return mps_model;
@@ -236,14 +233,10 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
   pc.strong_branch_up.assign(fractional.size(), 0);
   pc.num_strong_branches_completed = 0;
 
-  settings.log.printf("Strong branching using %d threads and %ld fractional variables\n",
-                      settings.num_threads,
-                      fractional.size());
-
   if (settings.mip_batch_pdlp_strong_branching) {
     settings.log.printf("Batch PDLP strong branching enabled\n");
 
-    std::chrono::steady_clock::time_point start_batch = std::chrono::steady_clock::now();
+    f_t start_batch = tic();
 
     // Use original_problem to create the BatchLP problem
     csr_matrix_t<i_t, f_t> A_row(original_problem.A.m, original_problem.A.n, 0);
@@ -263,8 +256,7 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
     const auto mps_model = simplex_problem_to_mps_data_model(original_problem);
     const auto solutions =
       batch_pdlp_solve(original_problem.handle_ptr, mps_model, fractional, fraction_values);
-    std::chrono::steady_clock::time_point end_batch = std::chrono::steady_clock::now();
-    std::chrono::duration<f_t> duration             = end_batch - start_batch;
+    f_t batch_pdlp_strong_branching_time = toc(start_batch);
 
     // Find max iteration on how many are done accross the batch
     i_t max_iterations = 0;
@@ -279,8 +271,8 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
     }
 
     settings.log.printf(
-      "Batch PDLP strong branching took %.2f seconds. Solved %d/%d with max %d iterations\n",
-      duration.count(),
+      "Batch PDLP strong branching completed in %.2fs. Solved %d/%d with max %d iterations\n",
+      batch_pdlp_strong_branching_time,
       amount_done,
       fractional.size() * 2,
       max_iterations);
@@ -312,7 +304,10 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
       pc.strong_branch_up[k]   = obj_up - root_obj;
     }
   } else {
-    std::chrono::steady_clock::time_point start_timea = std::chrono::steady_clock::now();
+    settings.log.printf("Strong branching using %d threads and %ld fractional variables\n",
+                        settings.num_threads,
+                        fractional.size());
+    f_t strong_branching_start_time = tic();
 
 #pragma omp parallel num_threads(settings.num_threads)
     {
@@ -349,9 +344,7 @@ void strong_branching(const user_problem_t<i_t, f_t>& original_problem,
                              pc);
       }
     }
-    std::chrono::steady_clock::time_point end_timea = std::chrono::steady_clock::now();
-    std::chrono::duration<f_t> duration             = end_timea - start_timea;
-    settings.log.printf("Dual Simplex Strong branching took %.2f seconds\n", duration.count());
+    settings.log.printf("Strong branching completed in %.2fs\n", toc(strong_branching_start_time));
   }
 
   pc.update_pseudo_costs_from_strong_branching(fractional, root_soln);
diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp
index a63c1181fa..82ea7c0e10 100644
--- a/cpp/src/dual_simplex/right_looking_lu.cpp
+++ b/cpp/src/dual_simplex/right_looking_lu.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -1115,7 +1115,7 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t<i_t, f_t>& A,
 
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
       settings.log.printf("Concurrent halt\n");
-      return -2;
+      return CONCURRENT_HALT_RETURN;
     }
   }
 
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index cbe0cbba5e..f9911ee53a 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -87,9 +87,19 @@ struct simplex_solver_settings_t {
       iteration_log_frequency(1000),
       first_iteration_log(2),
       num_threads(omp_get_max_threads() - 1),
+      max_cut_passes(0),
+      mir_cuts(-1),
+      mixed_integer_gomory_cuts(-1),
+      knapsack_cuts(-1),
+      strong_chvatal_gomory_cuts(-1),
+      reduced_cost_strengthening(-1),
+      cut_change_threshold(1e-3),
+      cut_min_orthogonality(0.5),
       num_bfs_workers(std::max(num_threads / 4, 1)),
       random_seed(0),
       inside_mip(0),
+      sub_mip(0),
+      reliability_branching(-1),
       solution_callback(nullptr),
       heuristic_preemption_callback(nullptr),
       concurrent_halt(nullptr)
@@ -154,6 +164,17 @@ struct simplex_solver_settings_t {
   i_t first_iteration_log;         // number of iterations to log at beginning of solve
   i_t num_threads;                 // number of threads to use
   i_t random_seed;                 // random seed
+  i_t max_cut_passes;              // number of cut passes to make
+  i_t mir_cuts;                    // -1 automatic, 0 to disable, >0 to enable MIR cuts
+  i_t mixed_integer_gomory_cuts;   // -1 automatic, 0 to disable, >0 to enable mixed integer Gomory
+                                   // cuts
+  i_t knapsack_cuts;               // -1 automatic, 0 to disable, >0 to enable knapsack cuts
+  i_t strong_chvatal_gomory_cuts;  // -1 automatic, 0 to disable, >0 to enable strong Chvatal Gomory
+                                   // cuts
+  i_t reduced_cost_strengthening;  // -1 automatic, 0 to disable, >0 to enable reduced cost
+                                   // strengthening
+  f_t cut_change_threshold;        // threshold for cut change
+  f_t cut_min_orthogonality;       // minimum orthogonality for cuts
   i_t num_bfs_workers;             // number of threads dedicated to the best-first search
   i_t mip_batch_pdlp_strong_branching{0};  // 0 if not using batch PDLP for strong branching, 1 if
                                            // using batch PDLP for strong branching
@@ -161,6 +182,8 @@ struct simplex_solver_settings_t {
   diving_heuristics_settings_t<i_t, f_t> diving_settings;  // Settings for the diving heuristics
 
   i_t inside_mip;  // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node
+  i_t sub_mip;     // 0 if in regular MIP solve, 1 if in sub-MIP solve
+  i_t reliability_branching;  // -1 automatic, 0 to disable, >0 to enable reliability branching
   std::function<void(std::vector<f_t>&, f_t)> solution_callback;
   std::function<void(const std::vector<f_t>&, f_t)> node_processed_callback;
   std::function<void()> heuristic_preemption_callback;
diff --git a/cpp/src/dual_simplex/solution.hpp b/cpp/src/dual_simplex/solution.hpp
index d1d745cbd9..a678e2fd70 100644
--- a/cpp/src/dual_simplex/solution.hpp
+++ b/cpp/src/dual_simplex/solution.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -39,7 +39,7 @@ class lp_solution_t {
   std::vector<f_t> x;
   // Dual solution vector. Lagrange multipliers for equality constraints.
   std::vector<f_t> y;
-  // Dual solution vector. Lagrange multipliers for inequality constraints.
+  // Reduced costs
   std::vector<f_t> z;
   f_t objective;
   f_t user_objective;
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 1f31a757d4..37297d9beb 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -8,6 +8,7 @@
 #include <dual_simplex/solve.hpp>
 
 #include <dual_simplex/barrier.hpp>
+#include <dual_simplex/basis_solves.hpp>
 #include <dual_simplex/branch_and_bound.hpp>
 #include <dual_simplex/crossover.hpp>
 #include <dual_simplex/initial_basis.hpp>
@@ -140,6 +141,7 @@ lp_status_t solve_linear_program_with_advanced_basis(
   lp_problem_t<i_t, f_t> presolved_lp(original_lp.handle_ptr, 1, 1, 1);
   presolve_info_t<i_t, f_t> presolve_info;
   const i_t ok = presolve(original_lp, settings, presolved_lp, presolve_info);
+  if (ok == CONCURRENT_HALT_RETURN) { return lp_status_t::CONCURRENT_LIMIT; }
   if (ok == -1) { return lp_status_t::INFEASIBLE; }
 
   constexpr bool write_out_matlab = false;
@@ -314,6 +316,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
   presolve_info_t<i_t, f_t> presolve_info;
   lp_problem_t<i_t, f_t> presolved_lp(user_problem.handle_ptr, 1, 1, 1);
   const i_t ok = presolve(original_lp, barrier_settings, presolved_lp, presolve_info);
+  if (ok == CONCURRENT_HALT_RETURN) { return lp_status_t::CONCURRENT_LIMIT; }
   if (ok == -1) { return lp_status_t::INFEASIBLE; }
 
   // Apply columns scaling to the presolve LP
@@ -457,18 +460,59 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
       settings.log.printf("Primal objective: %e\n",
                           dot<i_t, f_t>(dualize_info.primal_problem.objective, primal_solution.x));
 
-      std::vector<f_t> primal_residual = dualize_info.primal_problem.rhs;
-      matrix_vector_multiply(
-        dualize_info.primal_problem.A, 1.0, primal_solution.x, -1.0, primal_residual);
       std::vector<i_t> inequality_rows(dualize_info.primal_problem.num_rows, 1);
       for (i_t i : dualize_info.equality_rows) {
         inequality_rows[i] = 0;
       }
+      i_t less_rows = 0;
       for (i_t i = 0; i < dualize_info.primal_problem.num_rows; ++i) {
-        if (inequality_rows[i] == 1) {
-          primal_residual[i] = std::max(primal_residual[i], 0.0);  // a_i^T x - b_i <= 0
+        if (inequality_rows[i] == 1) { less_rows++; }
+      }
+      // Add slack variables to the primal problem
+      if (less_rows > 0) {
+        std::vector<f_t> slack_info = dualize_info.primal_problem.rhs;
+        matrix_vector_multiply(
+          dualize_info.primal_problem.A, -1.0, primal_solution.x, 1.0, slack_info);
+
+        lp_problem_t<i_t, f_t>& problem = dualize_info.primal_problem;
+        i_t num_cols                    = problem.num_cols + less_rows;
+        i_t nnz                         = problem.A.col_start[problem.num_cols] + less_rows;
+        problem.A.col_start.resize(num_cols + 1);
+        problem.A.i.resize(nnz);
+        problem.A.x.resize(nnz);
+        problem.lower.resize(num_cols);
+        problem.upper.resize(num_cols);
+        problem.objective.resize(num_cols);
+        primal_solution.x.resize(num_cols);
+        primal_solution.z.resize(num_cols);
+
+        i_t p = problem.A.col_start[problem.num_cols];
+        i_t j = problem.num_cols;
+        for (i_t i = 0; i < problem.num_rows; i++) {
+          if (inequality_rows[i] == 1) {
+            problem.lower[j]         = 0.0;
+            problem.upper[j]         = INFINITY;
+            problem.objective[j]     = 0.0;
+            problem.A.i[p]           = i;
+            problem.A.x[p]           = 1.0;
+            primal_solution.x[j]     = slack_info[i];
+            primal_solution.z[j]     = -primal_solution.y[i];
+            problem.A.col_start[j++] = p++;
+            inequality_rows[i]       = 0;
+            less_rows--;
+          }
         }
+        problem.A.col_start[num_cols] = p;
+        assert(less_rows == 0);
+        assert(p == nnz);
+        problem.A.n      = num_cols;
+        problem.num_cols = num_cols;
       }
+
+      std::vector<f_t> primal_residual = dualize_info.primal_problem.rhs;
+      matrix_vector_multiply(
+        dualize_info.primal_problem.A, 1.0, primal_solution.x, -1.0, primal_residual);
+
       f_t primal_residual_norm     = vector_norm_inf<i_t, f_t>(primal_residual);
       const f_t norm_b             = vector_norm_inf<i_t, f_t>(dualize_info.primal_problem.rhs);
       f_t primal_relative_residual = primal_residual_norm / (1.0 + norm_b);
@@ -505,6 +549,13 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
   if (!settings.crossover || barrier_lp.Q.n > 0) { return barrier_status; }
 
   if (settings.crossover && barrier_status == lp_status_t::OPTIMAL) {
+    {
+      std::vector<f_t> rhs = original_lp.rhs;
+      matrix_vector_multiply(original_lp.A, 1.0, lp_solution.x, -1.0, rhs);
+      f_t primal_residual = vector_norm_inf<i_t, f_t>(rhs);
+      settings.log.printf("Primal residual before adding artificial variables: %e\n",
+                          primal_residual);
+    }
     // Check to see if we need to add artifical variables
     std::vector<i_t> artificial_variables;
     artificial_variables.reserve(original_lp.num_rows);
@@ -550,6 +601,12 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
              lp_solution.x.size(),
              lp_solution.z.size());
 #endif
+
+      std::vector<f_t> rhs = original_lp.rhs;
+      matrix_vector_multiply(original_lp.A, 1.0, lp_solution.x, -1.0, rhs);
+      f_t primal_residual = vector_norm_inf<i_t, f_t>(rhs);
+      settings.log.printf("Primal residual after adding artificial variables: %e\n",
+                          primal_residual);
     }
 
     // Run crossover
@@ -597,7 +654,7 @@ i_t solve(const user_problem_t<i_t, f_t>& problem,
 {
   i_t status;
   if (is_mip(problem) && !settings.relaxation) {
-    branch_and_bound_t branch_and_bound(problem, settings);
+    branch_and_bound_t branch_and_bound(problem, settings, tic());
     mip_solution_t<i_t, f_t> mip_solution(problem.num_cols);
     mip_status_t mip_status = branch_and_bound.solve(mip_solution);
     if (mip_status == mip_status_t::OPTIMAL) {
@@ -636,7 +693,7 @@ i_t solve_mip_with_guess(const user_problem_t<i_t, f_t>& problem,
 {
   i_t status;
   if (is_mip(problem)) {
-    branch_and_bound_t branch_and_bound(problem, settings);
+    branch_and_bound_t branch_and_bound(problem, settings, tic());
     branch_and_bound.set_initial_guess(guess);
     mip_status_t mip_status = branch_and_bound.solve(solution);
     if (mip_status == mip_status_t::OPTIMAL) {
diff --git a/cpp/src/dual_simplex/sparse_cholesky.cuh b/cpp/src/dual_simplex/sparse_cholesky.cuh
index eea7ee8f32..e2b351afaa 100644
--- a/cpp/src/dual_simplex/sparse_cholesky.cuh
+++ b/cpp/src/dual_simplex/sparse_cholesky.cuh
@@ -67,7 +67,7 @@ class sparse_cholesky_base_t {
         "details: " #msg "\n",                                       \
         status);                                                     \
       CUDSS_EXAMPLE_FREE;                                            \
-      return -2;                                                     \
+      return -1;                                                     \
     }                                                                \
   } while (0);
 
@@ -444,7 +444,9 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       raft::common::nvtx::range fun_scope("Barrier: cuDSS Analyze : CUDSS_PHASE_ANALYSIS");
       status =
         cudssExecute(handle, CUDSS_PHASE_REORDERING, solverConfig, solverData, A, cudss_x, cudss_b);
-      if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
+      if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
+        return CONCURRENT_HALT_RETURN;
+      }
       if (status != CUDSS_STATUS_SUCCESS) {
         settings_.log.printf(
           "FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for "
@@ -458,7 +460,9 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
 
       status = cudssExecute(
         handle, CUDSS_PHASE_SYMBOLIC_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b);
-      if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
+      if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
+        return CONCURRENT_HALT_RETURN;
+      }
       if (status != CUDSS_STATUS_SUCCESS) {
         settings_.log.printf(
           "FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for "
@@ -514,7 +518,9 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     f_t start_numeric = tic();
     status            = cudssExecute(
       handle, CUDSS_PHASE_FACTORIZATION, solverConfig, solverData, A, cudss_x, cudss_b);
-    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
+    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
+      return CONCURRENT_HALT_RETURN;
+    }
     if (status != CUDSS_STATUS_SUCCESS) {
       settings_.log.printf(
         "FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for "
@@ -528,7 +534,9 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
 #endif
 
     f_t numeric_time = toc(start_numeric);
-    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
+    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
+      return CONCURRENT_HALT_RETURN;
+    }
 
     int info;
     size_t sizeWritten = 0;
@@ -626,7 +634,9 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     A_created = true;
 
     // Perform symbolic analysis
-    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
+    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
+      return CONCURRENT_HALT_RETURN;
+    }
     f_t start_analysis = tic();
     CUDSS_CALL_AND_CHECK(
       cudssExecute(handle, CUDSS_PHASE_REORDERING, solverConfig, solverData, A, cudss_x, cudss_b),
@@ -634,7 +644,9 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       "cudssExecute for reordering");
 
     f_t reorder_time = toc(start_analysis);
-    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
+    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
+      return CONCURRENT_HALT_RETURN;
+    }
 
     f_t start_symbolic = tic();
 
@@ -650,7 +662,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
       RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
       handle_ptr_->get_stream().synchronize();
-      return -2;
+      return CONCURRENT_HALT_RETURN;
     }
     int64_t lu_nz       = 0;
     size_t size_written = 0;
@@ -698,7 +710,9 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       "cudssExecute for factorization");
 
     f_t numeric_time = toc(start_numeric);
-    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
+    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
+      return CONCURRENT_HALT_RETURN;
+    }
 
     int info;
     size_t sizeWritten = 0;
@@ -761,7 +775,9 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       cudssMatrixSetValues(cudss_x, x.data()), status, "cudssMatrixSetValues for x");
 
     status = cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, cudss_x, cudss_b);
-    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
+    if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
+      return CONCURRENT_HALT_RETURN;
+    }
     if (status != CUDSS_STATUS_SUCCESS) {
       settings_.log.printf(
         "FAILED: CUDSS call ended unsuccessfully with status = %d, details: cuDSSExecute for "
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 7edc7b1eb5..1d8f12a3a5 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -363,6 +363,75 @@ i_t csc_matrix_t<i_t, f_t>::remove_row(i_t row)
   return 0;
 }
 
+template <typename i_t, typename f_t>
+i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
+{
+  const i_t old_m  = this->m;
+  const i_t n      = this->n;
+  const i_t old_nz = this->row_start[old_m];
+  const i_t C_row  = C.m;
+  if (C.n > n) {
+    printf("append_rows error: C.n %d n %d\n", C.n, n);
+    return -1;
+  }
+  const i_t C_nz   = C.row_start[C_row];
+  const i_t new_nz = old_nz + C_nz;
+  const i_t new_m  = old_m + C_row;
+
+  this->j.resize(new_nz);
+  this->x.resize(new_nz);
+  this->row_start.resize(new_m + 1);
+
+  i_t nz = old_nz;
+  for (i_t i = old_m; i < new_m; i++) {
+    const i_t k        = i - old_m;
+    const i_t nz_row   = C.row_start[k + 1] - C.row_start[k];
+    this->row_start[i] = nz;
+    nz += nz_row;
+  }
+  this->row_start[new_m] = nz;
+
+  for (i_t p = old_nz; p < new_nz; p++) {
+    const i_t q = p - old_nz;
+    this->j[p]  = C.j[q];
+  }
+
+  for (i_t p = old_nz; p < new_nz; p++) {
+    const i_t q = p - old_nz;
+    this->x[p]  = C.x[q];
+  }
+
+  this->m      = new_m;
+  this->nz_max = new_nz;
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+i_t csr_matrix_t<i_t, f_t>::append_row(const sparse_vector_t<i_t, f_t>& c)
+{
+  const i_t old_m  = this->m;
+  const i_t old_nz = this->row_start[old_m];
+  const i_t c_nz   = c.i.size();
+  const i_t new_nz = old_nz + c_nz;
+  const i_t new_m  = old_m + 1;
+
+  this->j.resize(new_nz);
+  this->x.resize(new_nz);
+  this->row_start.resize(new_m + 1);
+  this->row_start[new_m] = new_nz;
+
+  i_t nz = old_nz;
+  for (i_t k = 0; k < c_nz; k++) {
+    this->j[nz] = c.i[k];
+    this->x[nz] = c.x[k];
+    nz++;
+  }
+
+  this->m      = new_m;
+  this->nz_max = new_nz;
+  return 0;
+}
+
 template <typename i_t, typename f_t>
 void csc_matrix_t<i_t, f_t>::print_matrix(FILE* fid) const
 {
@@ -505,6 +574,10 @@ i_t csc_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
 #ifdef CHECK_MATRIX
   std::vector<i_t> row_marker(this->m, -1);
   for (i_t j = 0; j < this->n; ++j) {
+    if (j >= col_start.size()) {
+      printf("Col start too small size %ld n %d\n", col_start.size(), this->n);
+      return -1;
+    }
     const i_t col_start = this->col_start[j];
     const i_t col_end   = this->col_start[j + 1];
     if (col_start > col_end || col_start > this->col_start[this->n]) {
@@ -559,7 +632,7 @@ size_t csc_matrix_t<i_t, f_t>::hash() const
 }
 
 template <typename i_t, typename f_t>
-void csr_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
+i_t csr_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
 {
   std::vector<i_t> col_marker(this->n, -1);
   for (i_t i = 0; i < this->m; ++i) {
@@ -567,12 +640,18 @@ void csr_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
     const i_t row_end   = this->row_start[i + 1];
     for (i_t p = row_start; p < row_end; ++p) {
       const i_t j = this->j[p];
+      if (j < 0 || j >= this->n) {
+        printf("CSR Error: column index %d not in range [0, %d)\n", j, this->n);
+        return -1;
+      }
       if (col_marker[j] == i) {
         printf("CSR Error (%s) : repeated column index %d in row %d\n", matrix_name.c_str(), j, i);
+        return -1;
       }
       col_marker[j] = i;
     }
   }
+  return 0;
 }
 
 // x <- x + alpha * A(:, j)
diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp
index 9ae8ea80be..0b6c0b11d6 100644
--- a/cpp/src/dual_simplex/sparse_matrix.hpp
+++ b/cpp/src/dual_simplex/sparse_matrix.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -151,8 +151,14 @@ class csr_matrix_t {
   // Create a new matrix with the marked rows removed
   i_t remove_rows(std::vector<i_t>& row_marker, csr_matrix_t<i_t, f_t>& Aout) const;
 
+  // Append rows from another CSR matrix
+  i_t append_rows(const csr_matrix_t<i_t, f_t>& C);
+
+  // Append a row from a sparse vector
+  i_t append_row(const sparse_vector_t<i_t, f_t>& c);
+
   // Ensures no repeated column indices within a row
-  void check_matrix(std::string matrix_name = "") const;
+  i_t check_matrix(std::string matrix_name = "") const;
 
   bool is_diagonal() const
   {
diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp
index 2d47456505..4e2ecfa196 100644
--- a/cpp/src/dual_simplex/sparse_vector.cpp
+++ b/cpp/src/dual_simplex/sparse_vector.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -28,6 +28,21 @@ sparse_vector_t<i_t, f_t>::sparse_vector_t(const csc_matrix_t<i_t, f_t>& A, i_t
   }
 }
 
+template <typename i_t, typename f_t>
+sparse_vector_t<i_t, f_t>::sparse_vector_t(const csr_matrix_t<i_t, f_t>& A, i_t row)
+{
+  const i_t row_start = A.row_start[row];
+  const i_t row_end   = A.row_start[row + 1];
+  const i_t nz        = row_end - row_start;
+  n                   = A.n;
+  i.reserve(nz);
+  x.reserve(nz);
+  for (i_t k = row_start; k < row_end; ++k) {
+    i.push_back(A.j[k]);
+    x.push_back(A.x[k]);
+  }
+}
+
 template <typename i_t, typename f_t>
 void sparse_vector_t<i_t, f_t>::from_dense(const std::vector<f_t>& in)
 {
@@ -106,6 +121,17 @@ void sparse_vector_t<i_t, f_t>::inverse_permute_vector(const std::vector<i_t>& p
   y.i = i_perm;
 }
 
+template <typename i_t, typename f_t>
+f_t sparse_vector_t<i_t, f_t>::dot(const std::vector<f_t>& x_dense) const
+{
+  const i_t nz = i.size();
+  f_t dot      = 0.0;
+  for (i_t k = 0; k < nz; ++k) {
+    dot += x[k] * x_dense[i[k]];
+  }
+  return dot;
+}
+
 template <typename i_t, typename f_t>
 f_t sparse_vector_t<i_t, f_t>::sparse_dot(const csc_matrix_t<i_t, f_t>& Y, i_t y_col) const
 {
@@ -207,6 +233,28 @@ f_t sparse_vector_t<i_t, f_t>::find_coefficient(i_t index) const
   return std::numeric_limits<f_t>::quiet_NaN();
 }
 
+template <typename i_t, typename f_t>
+void sparse_vector_t<i_t, f_t>::squeeze(sparse_vector_t<i_t, f_t>& y) const
+{
+  y.n = n;
+
+  i_t nz       = 0;
+  const i_t nx = x.size();
+  for (i_t k = 0; k < nx; k++) {
+    if (x[k] != 0.0) { nz++; }
+  }
+  y.i.reserve(nz);
+  y.x.reserve(nz);
+  y.i.clear();
+  y.x.clear();
+  for (i_t k = 0; k < nx; k++) {
+    if (x[k] != 0.0) {
+      y.i.push_back(i[k]);
+      y.x.push_back(x[k]);
+    }
+  }
+}
+
 #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
 template class sparse_vector_t<int, double>;
 #endif
diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp
index 7acfdc8b5e..95e9afa29e 100644
--- a/cpp/src/dual_simplex/sparse_vector.hpp
+++ b/cpp/src/dual_simplex/sparse_vector.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -25,6 +25,8 @@ class sparse_vector_t {
   sparse_vector_t(const std::vector<f_t>& in) { from_dense(in); }
   // Construct a sparse vector from a column of a CSC matrix
   sparse_vector_t(const csc_matrix_t<i_t, f_t>& A, i_t col);
+  // Construct a sparse vector from a row of a CSR matrix
+  sparse_vector_t(const csr_matrix_t<i_t, f_t>& A, i_t row);
   // gather a dense vector into a sparse vector
   void from_dense(const std::vector<f_t>& in);
   // convert a sparse vector into a CSC matrix with a single column
@@ -38,6 +40,8 @@ class sparse_vector_t {
   void inverse_permute_vector(const std::vector<i_t>& p);
   // inverse permute a sparse vector into another sparse vector
   void inverse_permute_vector(const std::vector<i_t>& p, sparse_vector_t<i_t, f_t>& y) const;
+  // compute the dot product of a sparse vector with a dense vector
+  f_t dot(const std::vector<f_t>& x) const;
   // compute the dot product of a sparse vector with a column of a CSC matrix
   f_t sparse_dot(const csc_matrix_t<i_t, f_t>& Y, i_t y_col) const;
   // ensure the coefficients in the sparse vectory are sorted in terms of increasing index
@@ -47,6 +51,8 @@ class sparse_vector_t {
   void negate();
   f_t find_coefficient(i_t index) const;
 
+  void squeeze(sparse_vector_t<i_t, f_t>& y) const;
+
   i_t n;
   std::vector<i_t> i;
   std::vector<f_t> x;
diff --git a/cpp/src/dual_simplex/types.hpp b/cpp/src/dual_simplex/types.hpp
index a3e15190e2..9de33ed3b3 100644
--- a/cpp/src/dual_simplex/types.hpp
+++ b/cpp/src/dual_simplex/types.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -19,4 +19,7 @@ using float64_t = double;
 
 constexpr float64_t inf = std::numeric_limits<float64_t>::infinity();
 
+// We return this constant to signal that a concurrent halt has occurred
+#define CONCURRENT_HALT_RETURN -2
+
 }  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index 493e730fb9..9dc6ac9c5e 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -72,7 +72,9 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, 0.0, CUOPT_INFINITY, 1e-10},
     {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, 0.0, 1e-1, 1e-4},
     {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, 0.0, 1e-1, 1e-10},
-    {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, 0.0, 1e-1, 1e-10}
+    {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, 0.0, 1e-1, 1e-10},
+    {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, 0.0, std::numeric_limits<f_t>::infinity(), 1e-3},
+    {CUOPT_MIP_CUT_MIN_ORTHOGONALITY, &mip_settings.cut_min_orthogonality, 0.0, 1.0, 0.5}
    };
 
   // Int parameters
@@ -87,6 +89,12 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_DUALIZE, &pdlp_settings.dualize, -1, 1, -1},
     {CUOPT_ORDERING, &pdlp_settings.ordering, -1, 1, -1},
     {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1},
+    {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits<i_t>::max(), 10},
+    {CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS, &mip_settings.mir_cuts, -1, 1, -1},
+    {CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS, &mip_settings.mixed_integer_gomory_cuts, -1, 1, -1},
+    {CUOPT_MIP_KNAPSACK_CUTS, &mip_settings.knapsack_cuts, -1, 1, -1},
+    {CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS, &mip_settings.strong_chvatal_gomory_cuts, -1, 1, -1},
+    {CUOPT_MIP_REDUCED_COST_STRENGTHENING, &mip_settings.reduced_cost_strengthening, -1, std::numeric_limits<i_t>::max(), -1},
     {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1},
     {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1},
     {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 1, 0},
diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu
index cf2180801d..f016753270 100644
--- a/cpp/src/mip/diversity/diversity_manager.cu
+++ b/cpp/src/mip/diversity/diversity_manager.cu
@@ -216,10 +216,11 @@ bool diversity_manager_t<i_t, f_t>::run_presolve(f_t time_limit)
   lp_dual_optimal_solution.resize(problem_ptr->n_constraints,
                                   problem_ptr->handle_ptr->get_stream());
   problem_ptr->handle_ptr->sync_stream();
-  CUOPT_LOG_INFO("After trivial presolve: %d constraints, %d variables, objective offset %f.",
+  CUOPT_LOG_INFO("After cuOpt presolve: %d constraints, %d variables, objective offset %f.",
                  problem_ptr->n_constraints,
                  problem_ptr->n_variables,
                  problem_ptr->presolve_data.objective_offset);
+  CUOPT_LOG_INFO("cuOpt presolve time: %.2f", stats.presolve_time);
   return true;
 }
 
diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu
index af992d2e5c..cb086e1456 100644
--- a/cpp/src/mip/diversity/lns/rins.cu
+++ b/cpp/src/mip/diversity/lns/rins.cu
@@ -22,6 +22,8 @@
 #include <mip/mip_constants.hpp>
 #include <mip/presolve/trivial_presolve.cuh>
 
+#include <dual_simplex/tic_toc.hpp>
+
 namespace cuopt::linear_programming::detail {
 template <typename i_t, typename f_t>
 rins_t<i_t, f_t>::rins_t(mip_solver_context_t<i_t, f_t>& context_,
@@ -260,6 +262,8 @@ void rins_t<i_t, f_t>::run_rins()
   branch_and_bound_settings.integer_tol     = context.settings.tolerances.integrality_tolerance;
   branch_and_bound_settings.num_threads     = 2;
   branch_and_bound_settings.num_bfs_workers = 1;
+  branch_and_bound_settings.max_cut_passes  = 0;
+  branch_and_bound_settings.sub_mip         = 1;
 
   // In the future, let RINS use all the diving heuristics. For now,
   // restricting to guided diving.
@@ -273,8 +277,8 @@ void rins_t<i_t, f_t>::run_rins()
                                                                        f_t objective) {
     rins_solution_queue.push_back(solution);
   };
-  dual_simplex::branch_and_bound_t<i_t, f_t> branch_and_bound(branch_and_bound_problem,
-                                                              branch_and_bound_settings);
+  dual_simplex::branch_and_bound_t<i_t, f_t> branch_and_bound(
+    branch_and_bound_problem, branch_and_bound_settings, dual_simplex::tic());
   branch_and_bound.set_initial_guess(cuopt::host_copy(fixed_assignment, rins_handle.get_stream()));
   branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution);
 
diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh
index 00cd0730da..e636e74714 100644
--- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh
+++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh
@@ -13,6 +13,7 @@
 #include <dual_simplex/branch_and_bound.hpp>
 #include <dual_simplex/simplex_solver_settings.hpp>
 #include <dual_simplex/solve.hpp>
+#include <dual_simplex/tic_toc.hpp>
 
 namespace cuopt::linear_programming::detail {
 
@@ -106,6 +107,8 @@ class sub_mip_recombiner_t : public recombiner_t<i_t, f_t> {
       branch_and_bound_settings.integer_tol     = context.settings.tolerances.integrality_tolerance;
       branch_and_bound_settings.num_threads     = 2;
       branch_and_bound_settings.num_bfs_workers = 1;
+      branch_and_bound_settings.max_cut_passes  = 0;
+      branch_and_bound_settings.sub_mip         = 1;
 
       // In the future, let SubMIP use all the diving heuristics. For now,
       // restricting to guided diving.
@@ -120,8 +123,8 @@ class sub_mip_recombiner_t : public recombiner_t<i_t, f_t> {
 
       // disable B&B logs, so that it is not interfering with the main B&B thread
       branch_and_bound_settings.log.log = false;
-      dual_simplex::branch_and_bound_t<i_t, f_t> branch_and_bound(branch_and_bound_problem,
-                                                                  branch_and_bound_settings);
+      dual_simplex::branch_and_bound_t<i_t, f_t> branch_and_bound(
+        branch_and_bound_problem, branch_and_bound_settings, dual_simplex::tic());
       branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution);
       if (solution_vector.size() > 0) {
         cuopt_assert(fixed_assignment.size() == branch_and_bound_solution.x.size(),
diff --git a/cpp/src/mip/presolve/third_party_presolve.cpp b/cpp/src/mip/presolve/third_party_presolve.cpp
index 9a212ebab0..b60747c57d 100644
--- a/cpp/src/mip/presolve/third_party_presolve.cpp
+++ b/cpp/src/mip/presolve/third_party_presolve.cpp
@@ -303,7 +303,7 @@ void check_presolve_status(const papilo::PresolveStatus& status)
 void check_postsolve_status(const papilo::PostsolveStatus& status)
 {
   switch (status) {
-    case papilo::PostsolveStatus::kOk: CUOPT_LOG_INFO("Post-solve status: succeeded"); break;
+    case papilo::PostsolveStatus::kOk: CUOPT_LOG_DEBUG("Post-solve status: succeeded"); break;
     case papilo::PostsolveStatus::kFailed:
       CUOPT_LOG_INFO(
         "Post-solve status: Post solved solution violates constraints. This is most likely due to "
@@ -409,7 +409,7 @@ std::optional<third_party_presolve_result_t<i_t, f_t>> third_party_presolve_t<i_
                  papilo_problem.getNCols(),
                  papilo_problem.getConstraintMatrix().getNnz());
 
-  CUOPT_LOG_INFO("Calling Papilo presolver");
+  CUOPT_LOG_INFO("Calling Papilo presolver (git hash %s)", PAPILO_GITHASH);
   if (category == problem_category_t::MIP) { dual_postsolve = false; }
   papilo::Presolve<f_t> presolver;
   set_presolve_methods<f_t>(presolver, category, dual_postsolve);
diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu
index 62ee0bb95e..ee852fb291 100644
--- a/cpp/src/mip/solve.cu
+++ b/cpp/src/mip/solve.cu
@@ -266,7 +266,7 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
         CUOPT_LOG_INFO("%d implied integers", presolve_result->implied_integer_indices.size());
       }
       if (problem.is_objective_integral()) { CUOPT_LOG_INFO("Objective function is integral"); }
-      CUOPT_LOG_INFO("Papilo presolve time: %f", presolve_time);
+      CUOPT_LOG_INFO("Papilo presolve time: %.2f", presolve_time);
     }
     if (settings.user_problem_file != "") {
       CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str());
diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu
index b88d39eaa7..2829babc97 100644
--- a/cpp/src/mip/solver.cu
+++ b/cpp/src/mip/solver.cu
@@ -106,8 +106,9 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     context.problem_ptr->post_process_solution(sol);
     return sol;
   }
-  dm.timer              = timer_;
-  bool presolve_success = dm.run_presolve(timer_.remaining_time());
+  dm.timer                = timer_;
+  const bool run_presolve = context.settings.presolve;
+  bool presolve_success   = run_presolve ? dm.run_presolve(timer_.remaining_time()) : true;
   if (!presolve_success) {
     CUOPT_LOG_INFO("Problem proven infeasible in presolve");
     solution_t<i_t, f_t> sol(*context.problem_ptr);
@@ -115,7 +116,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     context.problem_ptr->post_process_solution(sol);
     return sol;
   }
-  if (context.problem_ptr->empty) {
+  if (run_presolve && context.problem_ptr->empty) {
     CUOPT_LOG_INFO("Problem full reduced in presolve");
     solution_t<i_t, f_t> sol(*context.problem_ptr);
     sol.set_problem_fully_reduced();
@@ -130,7 +131,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
   }
 
   // if the problem was reduced to a LP: run concurrent LP
-  if (context.problem_ptr->n_integer_vars == 0) {
+  if (run_presolve && context.problem_ptr->n_integer_vars == 0) {
     CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP");
     pdlp_solver_settings_t<i_t, f_t> settings{};
     settings.time_limit = timer_.remaining_time();
@@ -174,11 +175,23 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     branch_and_bound_solution.resize(branch_and_bound_problem.num_cols);
 
     // Fill in the settings for branch and bound
-    branch_and_bound_settings.time_limit           = timer_.remaining_time();
+    branch_and_bound_settings.time_limit           = timer_.get_time_limit();
+    branch_and_bound_settings.node_limit           = context.settings.node_limit;
     branch_and_bound_settings.print_presolve_stats = false;
     branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap;
     branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap;
-    branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance;
+    branch_and_bound_settings.integer_tol    = context.settings.tolerances.integrality_tolerance;
+    branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes;
+    branch_and_bound_settings.mir_cuts       = context.settings.mir_cuts;
+    branch_and_bound_settings.mixed_integer_gomory_cuts =
+      context.settings.mixed_integer_gomory_cuts;
+    branch_and_bound_settings.knapsack_cuts = context.settings.knapsack_cuts;
+    branch_and_bound_settings.strong_chvatal_gomory_cuts =
+      context.settings.strong_chvatal_gomory_cuts;
+    branch_and_bound_settings.reduced_cost_strengthening =
+      context.settings.reduced_cost_strengthening;
+    branch_and_bound_settings.cut_change_threshold  = context.settings.cut_change_threshold;
+    branch_and_bound_settings.cut_min_orthogonality = context.settings.cut_min_orthogonality;
 
     if (context.settings.num_cpu_threads < 0) {
       branch_and_bound_settings.num_threads = omp_get_max_threads() - 1;
@@ -218,7 +231,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
 
     // Create the branch and bound object
     branch_and_bound = std::make_unique<dual_simplex::branch_and_bound_t<i_t, f_t>>(
-      branch_and_bound_problem, branch_and_bound_settings);
+      branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start());
     context.branch_and_bound_ptr = branch_and_bound.get();
     branch_and_bound->set_concurrent_lp_root_solve(true);
     auto* stats_ptr = &context.stats;
diff --git a/cpp/src/utilities/timer.hpp b/cpp/src/utilities/timer.hpp
index 1d1a4881e0..b7ab6a63bd 100644
--- a/cpp/src/utilities/timer.hpp
+++ b/cpp/src/utilities/timer.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -55,6 +55,35 @@ class timer_t {
 
   double get_time_limit() const noexcept { return time_limit; }
 
+  double get_tic_start() const noexcept
+  {
+    /**
+     * Converts a std::chrono::steady_clock::time_point to a struct timeval.
+     * This is an approximate conversion because steady_clock is relative to an
+     * unspecified epoch (e.g., system boot time), not the system clock epoch (UTC).
+     */
+    // Get the current time from both clocks at approximately the same instant
+    std::chrono::system_clock::time_point sys_now    = std::chrono::system_clock::now();
+    std::chrono::steady_clock::time_point steady_now = std::chrono::steady_clock::now();
+
+    // Calculate the difference between the given steady_clock time point and the current steady
+    // time
+    auto diff_from_now = begin - steady_now;
+
+    // Apply that same difference to the current system clock time point
+    std::chrono::system_clock::time_point sys_t = sys_now + diff_from_now;
+
+    // Convert the resulting system_clock time point to microseconds since the system epoch
+    auto us_since_epoch =
+      std::chrono::duration_cast<std::chrono::microseconds>(sys_t.time_since_epoch());
+
+    // Populate the timeval struct
+    double tv_sec  = us_since_epoch.count() / 1000000;
+    double tv_usec = us_since_epoch.count() % 1000000;
+
+    return tv_sec + 1e-6 * tv_usec;
+  }
+
  private:
   double time_limit;
   steady_clock::time_point begin;
diff --git a/cpp/tests/mip/CMakeLists.txt b/cpp/tests/mip/CMakeLists.txt
index ce47f31444..43fc273dbe 100644
--- a/cpp/tests/mip/CMakeLists.txt
+++ b/cpp/tests/mip/CMakeLists.txt
@@ -1,5 +1,5 @@
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 
@@ -26,6 +26,9 @@ ConfigureTest(INCUMBENT_CALLBACK_TEST
 ConfigureTest(DOC_EXAMPLE_TEST
     ${CMAKE_CURRENT_SOURCE_DIR}/doc_example_test.cu
 )
+ConfigureTest(CUTS_TEST
+    ${CMAKE_CURRENT_SOURCE_DIR}/cuts_test.cu
+)
 ConfigureTest(UNIT_TEST
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_test.cu
     ${CMAKE_CURRENT_SOURCE_DIR}/integer_with_real_bounds.cu
diff --git a/cpp/tests/mip/cuts_test.cu b/cpp/tests/mip/cuts_test.cu
new file mode 100644
index 0000000000..72b9acd47a
--- /dev/null
+++ b/cpp/tests/mip/cuts_test.cu
@@ -0,0 +1,168 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include "../linear_programming/utilities/pdlp_test_utilities.cuh"
+#include "mip_utils.cuh"
+
+#include <cuopt/linear_programming/solve.hpp>
+#include <mps_parser/parser.hpp>
+#include <utilities/common_utils.hpp>
+#include <utilities/error.hpp>
+
+#include <raft/core/handle.hpp>
+#include <raft/util/cudart_utils.hpp>
+
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <filesystem>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace cuopt::linear_programming::test {
+
+// Problem data for the mixed integer linear programming problem
+mps_parser::mps_data_model_t<int, double> create_cuts_problem_1()
+{
+  // Create problem instance
+  mps_parser::mps_data_model_t<int, double> problem;
+
+  // Solve the problem
+  // minimize -7*x1 -2*x2
+  // subject to -1*x1 + 2*x2 <= 4
+  //            5*x1 + 1*x2 <= 20
+  //            -2*x1 -2*x2 <= -7
+
+  // Set up constraint matrix in CSR format
+  std::vector<int> offsets         = {0, 2, 4, 6};
+  std::vector<int> indices         = {0, 1, 0, 1, 0, 1};
+  std::vector<double> coefficients = {-1.0, 2.0, 5.0, 1.0, -2.0, -2.0};
+  problem.set_csr_constraint_matrix(coefficients.data(),
+                                    coefficients.size(),
+                                    indices.data(),
+                                    indices.size(),
+                                    offsets.data(),
+                                    offsets.size());
+
+  // Set constraint bounds
+  std::vector<double> lower_bounds = {-std::numeric_limits<double>::infinity(),
+                                      -std::numeric_limits<double>::infinity(),
+                                      -std::numeric_limits<double>::infinity()};
+  std::vector<double> upper_bounds = {4.0, 20.0, -7.0};
+  problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size());
+  problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size());
+
+  // Set variable bounds
+  std::vector<double> var_lower_bounds = {0.0, 0.0};
+  std::vector<double> var_upper_bounds = {10.0, 10.0};
+  problem.set_variable_lower_bounds(var_lower_bounds.data(), var_lower_bounds.size());
+  problem.set_variable_upper_bounds(var_upper_bounds.data(), var_upper_bounds.size());
+
+  // Set objective coefficients (minimize -7*x1 -2*x2)
+  std::vector<double> objective_coefficients = {-7.0, -2.0};
+  problem.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size());
+
+  // Set variable types
+  std::vector<char> variable_types = {'I', 'I'};
+  problem.set_variable_types(variable_types);
+
+  return problem;
+}
+
+TEST(cuts, test_cuts_1)
+{
+  const raft::handle_t handle_{};
+  mip_solver_settings_t<int, double> settings;
+  constexpr double test_time_limit = 1.;
+
+  // Create the problem
+  auto problem = create_cuts_problem_1();
+
+  settings.time_limit                  = test_time_limit;
+  settings.max_cut_passes              = 1;
+  mip_solution_t<int, double> solution = solve_mip(&handle_, problem, settings);
+  EXPECT_EQ(solution.get_termination_status(), mip_termination_status_t::Optimal);
+
+  double obj_val = solution.get_objective_value();
+  // Expected objective value from documentation example is approximately -28
+  EXPECT_NEAR(-28, obj_val, 1e-3);
+
+  EXPECT_EQ(solution.get_num_nodes(), 0);
+}
+
+// Problem data for the mixed integer linear programming problem
+mps_parser::mps_data_model_t<int, double> create_cuts_problem_2()
+{
+  // Create problem instance
+  mps_parser::mps_data_model_t<int, double> problem;
+
+  // Solve the problem
+  // minimize -86*y1 -4*y2 -40*y3
+  // subject to 774*y1 + 76*y2 + 42*y3 <= 875
+  //            67*y1 + 27*y2 + 53*y3 <= 875
+  //            y1, y2, y3 in {0, 1}
+
+  // Set up constraint matrix in CSR format
+  std::vector<int> offsets         = {0, 3, 6};
+  std::vector<int> indices         = {0, 1, 2, 0, 1, 2};
+  std::vector<double> coefficients = {774.0, 76.0, 42.0, 67.0, 27.0, 53.0};
+  problem.set_csr_constraint_matrix(coefficients.data(),
+                                    coefficients.size(),
+                                    indices.data(),
+                                    indices.size(),
+                                    offsets.data(),
+                                    offsets.size());
+
+  // Set constraint bounds
+  std::vector<double> lower_bounds = {-std::numeric_limits<double>::infinity(),
+                                      -std::numeric_limits<double>::infinity()};
+  std::vector<double> upper_bounds = {875.0, 875.0};
+  problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size());
+  problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size());
+
+  // Set variable bounds
+  std::vector<double> var_lower_bounds = {0.0, 0.0, 0.0};
+  std::vector<double> var_upper_bounds = {1.0, 1.0, 1.0};
+  problem.set_variable_lower_bounds(var_lower_bounds.data(), var_lower_bounds.size());
+  problem.set_variable_upper_bounds(var_upper_bounds.data(), var_upper_bounds.size());
+
+  // Set objective coefficients (minimize -86*y1 -4*y2 -40*y3)
+  std::vector<double> objective_coefficients = {-86.0, -4.0, -40.0};
+  problem.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size());
+
+  // Set variable types
+  std::vector<char> variable_types = {'I', 'I', 'I'};
+  problem.set_variable_types(variable_types);
+
+  return problem;
+}
+
+TEST(cuts, test_cuts_2)
+{
+  const raft::handle_t handle_{};
+  mip_solver_settings_t<int, double> settings;
+  constexpr double test_time_limit = 1.;
+
+  // Create the problem
+  auto problem = create_cuts_problem_2();
+
+  settings.time_limit                  = test_time_limit;
+  settings.max_cut_passes              = 10;
+  settings.presolve                    = false;
+  mip_solution_t<int, double> solution = solve_mip(&handle_, problem, settings);
+  EXPECT_EQ(solution.get_termination_status(), mip_termination_status_t::Optimal);
+
+  double obj_val = solution.get_objective_value();
+  // Expected objective value from documentation example is approximately -126
+  EXPECT_NEAR(-126, obj_val, 1e-3);
+
+  EXPECT_EQ(solution.get_num_nodes(), 0);
+}
+
+}  // namespace cuopt::linear_programming::test
diff --git a/cpp/tests/mip/termination_test.cu b/cpp/tests/mip/termination_test.cu
index 849fded55e..3627dd7f40 100644
--- a/cpp/tests/mip/termination_test.cu
+++ b/cpp/tests/mip/termination_test.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -88,7 +88,7 @@ TEST(termination_status, optimality_test)
   auto [termination_status, obj_val, lb] =
     test_mps_file("mip/bb_optimality.mps", default_time_limit, false);
   EXPECT_EQ(termination_status, mip_termination_status_t::Optimal);
-  EXPECT_EQ(obj_val, 2);
+  EXPECT_NEAR(obj_val, 2, 1e-6);
 }
 
 // Ensure the lower bound on maximization problems when BB times out has the right sign
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 71befa512f..156c7ed69f 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -395,9 +395,10 @@ def set_solution(
         x_val = sol["solution"][0]
         y_val = sol["solution"][1]
         cost = sol["cost"]
-        assert 2 * x_val + 4 * y_val >= 230
-        assert 3 * x_val + 2 * y_val <= 190
-        assert 5 * x_val + 3 * y_val == cost
+        tol = 1e-6
+        assert 2 * x_val + 4 * y_val >= 230 - tol
+        assert 3 * x_val + 2 * y_val <= 190 + tol
+        assert abs(5 * x_val + 3 * y_val - cost) < tol
 
 
 def test_incumbent_get_solutions():
diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml
index e2c1142523..4ddc9ca756 100644
--- a/python/libcuopt/pyproject.toml
+++ b/python/libcuopt/pyproject.toml
@@ -53,7 +53,6 @@ libcuopt = "libcuopt"
 select = [
     "distro-too-large-compressed",
 ]
-
 max_allowed_size_compressed = '660M'
 
 [project.scripts]