diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index 79188cacc3..2dc95a23a1 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -22,11 +22,11 @@ PYDISTCHECK_ARGS=(
 if [[ "${package_dir}" == "python/libcuopt" ]]; then
     if [[ "${RAPIDS_CUDA_MAJOR}" == "12" ]]; then
         PYDISTCHECK_ARGS+=(
-            --max-allowed-size-compressed '650Mi'
+            --max-allowed-size-compressed '660Mi'
         )
     else
         PYDISTCHECK_ARGS+=(
-            --max-allowed-size-compressed '495Mi'
+            --max-allowed-size-compressed '520Mi'
         )
     fi
 elif [[ "${package_dir}" != "python/cuopt" ]] && \
diff --git a/cpp/src/branch_and_bound/CMakeLists.txt b/cpp/src/branch_and_bound/CMakeLists.txt
index 5bb1017120..1e40c1bbf1 100644
--- a/cpp/src/branch_and_bound/CMakeLists.txt
+++ b/cpp/src/branch_and_bound/CMakeLists.txt
@@ -5,7 +5,6 @@
 
 set(BRANCH_AND_BOUND_SRC_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/mip_node.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/pseudo_costs.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp
   )
diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp
index 33a2d983c9..8881565b6d 100644
--- a/cpp/src/branch_and_bound/branch_and_bound.cpp
+++ b/cpp/src/branch_and_bound/branch_and_bound.cpp
@@ -6,6 +6,7 @@
 /* clang-format on */
 
 #include <branch_and_bound/branch_and_bound.hpp>
+#include <branch_and_bound/diving_heuristics.hpp>
 #include <branch_and_bound/mip_node.hpp>
 #include <branch_and_bound/pseudo_costs.hpp>
 
@@ -35,15 +36,12 @@
 #include <deque>
 #include <future>
 #include <limits>
-#include <map>
 #include <optional>
 #include <string>
 #include <thread>
-#include <unordered_map>
 #include <vector>
 
 namespace cuopt::linear_programming::dual_simplex {
-
 namespace {
 
 template <typename f_t>
@@ -1039,7 +1037,9 @@ struct deterministic_bfs_policy_t
                                                 const std::vector<i_t>& fractional,
                                                 const std::vector<f_t>& x) override
   {
-    i_t var  = this->worker.pc_snapshot.variable_selection(fractional, x);
+    logger_t log;
+    log.log  = false;
+    i_t var  = this->worker.pc_snapshot.variable_selection(fractional, x, log);
     auto dir = martin_criteria(x[var], this->bnb.root_relax_soln_.x[var]);
     return {var, dir};
   }
@@ -1048,8 +1048,10 @@ struct deterministic_bfs_policy_t
                                  const std::vector<i_t>& fractional,
                                  const std::vector<f_t>& x) override
   {
+    logger_t log;
+    log.log = false;
     node->objective_estimate =
-      this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound);
+      this->worker.pc_snapshot.obj_estimate(fractional, x, node->lower_bound, log);
   }
 
   void on_node_completed(mip_node_t<i_t, f_t>* node,
@@ -1114,25 +1116,28 @@ struct deterministic_diving_policy_t
                                                 const std::vector<i_t>& fractional,
                                                 const std::vector<f_t>& x) override
   {
+    logger_t log;
+    log.log = false;
+
     switch (this->worker.diving_type) {
       case search_strategy_t::PSEUDOCOST_DIVING:
-        return this->worker.variable_selection_from_snapshot(fractional, x);
+        return pseudocost_diving(
+          this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log);
 
       case search_strategy_t::LINE_SEARCH_DIVING:
-        if (this->worker.root_solution) {
-          logger_t log;
-          log.log = false;
-          return line_search_diving<i_t, f_t>(fractional, x, *this->worker.root_solution, log);
-        }
-        return this->worker.variable_selection_from_snapshot(fractional, x);
+        return line_search_diving<i_t, f_t>(fractional, x, *this->worker.root_solution, log);
 
       case search_strategy_t::GUIDED_DIVING:
-        return this->worker.guided_variable_selection(fractional, x);
+        if (this->worker.incumbent_snapshot.empty()) {
+          return pseudocost_diving(
+            this->worker.pc_snapshot, fractional, x, *this->worker.root_solution, log);
+        } else {
+          return guided_diving(
+            this->worker.pc_snapshot, fractional, x, this->worker.incumbent_snapshot, log);
+        }
 
       case search_strategy_t::COEFFICIENT_DIVING: {
-        logger_t log;
-        log.log = false;
-        return coefficient_diving<i_t, f_t>(this->bnb.original_lp_,
+        return coefficient_diving<i_t, f_t>(this->worker.leaf_problem,
                                             fractional,
                                             x,
                                             this->bnb.var_up_locks_,
@@ -1140,7 +1145,7 @@ struct deterministic_diving_policy_t
                                             log);
       }
 
-      default: return this->worker.variable_selection_from_snapshot(fractional, x);
+      default: CUOPT_LOG_ERROR("Invalid diving method!"); return {-1, rounding_direction_t::NONE};
     }
   }
 
@@ -1188,6 +1193,9 @@ std::pair<node_status_t, rounding_direction_t> branch_and_bound_t<i_t, f_t>::upd
   node_status_t status                   = node_status_t::PENDING;
   rounding_direction_t round_dir         = rounding_direction_t::NONE;
 
+  worker->recompute_basis  = true;
+  worker->recompute_bounds = true;
+
   if (lp_status == dual::status_t::DUAL_UNBOUNDED) {
     node_ptr->lower_bound = inf;
     policy.graphviz(search_tree, node_ptr, "infeasible", 0.0);
@@ -1247,6 +1255,8 @@ std::pair<node_status_t, rounding_direction_t> branch_and_bound_t<i_t, f_t>::upd
       assert(dir != rounding_direction_t::NONE);
 
       policy.update_objective_estimate(node_ptr, leaf_fractional, leaf_solution.x);
+      worker->recompute_basis  = false;
+      worker->recompute_bounds = false;
 
       logger_t log;
       log.log = false;
@@ -2506,7 +2516,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_);
 
   pc_.resize(original_lp_.num_cols);
-  original_lp_.A.transpose(pc_.AT);
+  original_lp_.A.transpose(*pc_.AT);
   {
     raft::common::nvtx::range scope_sb("BB::strong_branching");
     strong_branching<i_t, f_t>(original_lp_,
@@ -3321,11 +3331,12 @@ template <typename PoolT>
 void branch_and_bound_t<i_t, f_t>::deterministic_broadcast_snapshots(
   PoolT& pool, const std::vector<f_t>& incumbent_snapshot)
 {
-  deterministic_snapshot_t<i_t, f_t> snap;
-  snap.upper_bound    = upper_bound_.load();
-  snap.total_lp_iters = exploration_stats_.total_lp_iters.load();
-  snap.incumbent      = incumbent_snapshot;
-  snap.pc_snapshot    = pc_.create_snapshot();
+  deterministic_snapshot_t<i_t, f_t> snap{
+    .upper_bound    = upper_bound_,
+    .pc_snapshot    = pc_,
+    .incumbent      = incumbent_snapshot,
+    .total_lp_iters = exploration_stats_.total_lp_iters,
+  };
 
   for (auto& worker : pool) {
     worker.set_snapshots(snap);
diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp
index f2917ba930..6a9269bb2f 100644
--- a/cpp/src/branch_and_bound/branch_and_bound.hpp
+++ b/cpp/src/branch_and_bound/branch_and_bound.hpp
@@ -8,12 +8,12 @@
 #pragma once
 
 #include <branch_and_bound/bb_event.hpp>
-#include <branch_and_bound/branch_and_bound_worker.hpp>
 #include <branch_and_bound/deterministic_workers.hpp>
-#include <branch_and_bound/diving_heuristics.hpp>
 #include <branch_and_bound/mip_node.hpp>
 #include <branch_and_bound/node_queue.hpp>
 #include <branch_and_bound/pseudo_costs.hpp>
+#include <branch_and_bound/worker.hpp>
+#include <branch_and_bound/worker_pool.hpp>
 
 #include <cuts/cuts.hpp>
 
@@ -107,7 +107,7 @@ class branch_and_bound_t {
     }
   }
 
-  // Set a solution based on the user problem during the course of the solve
+  // Set a solution based on the user problem during solve time
   void set_new_solution(const std::vector<f_t>& solution);
 
   // This queues the solution to be processed at the correct work unit timestamp
diff --git a/cpp/src/branch_and_bound/constants.hpp b/cpp/src/branch_and_bound/constants.hpp
new file mode 100644
index 0000000000..ab8677095c
--- /dev/null
+++ b/cpp/src/branch_and_bound/constants.hpp
@@ -0,0 +1,31 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+namespace cuopt::linear_programming::dual_simplex {
+
+constexpr int num_search_strategies = 5;
+
+// Indicate the search and variable selection algorithms used by each thread
+// in B&B (See [1]).
+//
+// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin,
+// Berlin, 2007. doi: 10.14279/depositonce-1634.
+enum search_strategy_t : int {
+  BEST_FIRST         = 0,  // Best-First + Plunging.
+  PSEUDOCOST_DIVING  = 1,  // Pseudocost diving (9.2.5)
+  LINE_SEARCH_DIVING = 2,  // Line search diving (9.2.4)
+  GUIDED_DIVING      = 3,  // Guided diving (9.2.3).
+  COEFFICIENT_DIVING = 4   // Coefficient diving (9.2.1)
+};
+
+enum class rounding_direction_t { NONE = -1, DOWN = 0, UP = 1 };
+
+enum class branch_and_bound_mode_t { PARALLEL = 0, DETERMINISTIC = 1 };
+
+}  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp
index 7a074051c6..a5c3769126 100644
--- a/cpp/src/branch_and_bound/deterministic_workers.hpp
+++ b/cpp/src/branch_and_bound/deterministic_workers.hpp
@@ -8,9 +8,9 @@
 #pragma once
 
 #include <branch_and_bound/bb_event.hpp>
-#include <branch_and_bound/branch_and_bound_worker.hpp>
 #include <branch_and_bound/diving_heuristics.hpp>
 #include <branch_and_bound/node_queue.hpp>
+#include <branch_and_bound/worker.hpp>
 
 #include <utilities/work_limit_context.hpp>
 
@@ -58,7 +58,7 @@ struct deterministic_snapshot_t {
   f_t upper_bound;
   pseudo_cost_snapshot_t<i_t, f_t> pc_snapshot;
   std::vector<f_t> incumbent;
-  i_t total_lp_iters;
+  int64_t total_lp_iters;
 };
 
 template <typename i_t, typename f_t, typename Derived>
@@ -74,7 +74,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t<i_t, f_t> {
 
   // Diving-specific snapshots (ignored by BFS workers)
   std::vector<f_t> incumbent_snapshot;
-  i_t total_lp_iters_snapshot{0};
+  int64_t total_lp_iters_snapshot{0};
 
   std::vector<queued_integer_solution_t<i_t, f_t>> integer_solutions;
   int next_solution_seq{0};
@@ -90,7 +90,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t<i_t, f_t> {
                               const std::vector<variable_type_t>& var_types,
                               const simplex_solver_settings_t<i_t, f_t>& settings,
                               const std::string& context_name)
-    : base_t(id, original_lp, Arow, var_types, settings), work_context(context_name)
+    : base_t(id, original_lp, Arow, var_types, settings), work_context(context_name), pc_snapshot(1)
   {
     work_context.deterministic = true;
   }
@@ -342,22 +342,6 @@ class deterministic_diving_worker_t
       {objective, solution, depth, this->worker_id, this->next_solution_seq++});
     ++this->total_integer_solutions;
   }
-
-  branch_variable_t<i_t> variable_selection_from_snapshot(const std::vector<i_t>& fractional,
-                                                          const std::vector<f_t>& solution) const
-  {
-    assert(root_solution != nullptr);
-    return this->pc_snapshot.pseudocost_diving(fractional, solution, *root_solution);
-  }
-
-  branch_variable_t<i_t> guided_variable_selection(const std::vector<i_t>& fractional,
-                                                   const std::vector<f_t>& solution) const
-  {
-    if (this->incumbent_snapshot.empty()) {
-      return variable_selection_from_snapshot(fractional, solution);
-    }
-    return this->pc_snapshot.guided_diving(fractional, solution, this->incumbent_snapshot);
-  }
 };
 
 template <typename i_t, typename f_t, typename WorkerT, typename Derived>
diff --git a/cpp/src/branch_and_bound/diving_heuristics.cpp b/cpp/src/branch_and_bound/diving_heuristics.cpp
index f9791280a6..571027c1d7 100644
--- a/cpp/src/branch_and_bound/diving_heuristics.cpp
+++ b/cpp/src/branch_and_bound/diving_heuristics.cpp
@@ -65,38 +65,117 @@ branch_variable_t<i_t> line_search_diving(const std::vector<i_t>& fractional,
   return {branch_var, round_dir};
 }
 
-template <typename i_t, typename f_t>
-branch_variable_t<i_t> pseudocost_diving(pseudo_costs_t<i_t, f_t>& pc,
+template <typename i_t, typename f_t, branch_and_bound_mode_t Mode>
+branch_variable_t<i_t> pseudocost_diving(pseudo_costs_t<i_t, f_t, Mode>& pc,
                                          const std::vector<i_t>& fractional,
                                          const std::vector<f_t>& solution,
                                          const std::vector<f_t>& root_solution,
                                          logger_t& log)
 {
-  return pseudocost_diving_from_arrays(pc.pseudo_cost_sum_down.data(),
-                                       pc.pseudo_cost_sum_up.data(),
-                                       pc.pseudo_cost_num_down.data(),
-                                       pc.pseudo_cost_num_up.data(),
-                                       (i_t)pc.pseudo_cost_sum_down.size(),
-                                       fractional,
-                                       solution,
-                                       root_solution);
+  const i_t num_fractional = fractional.size();
+  if (num_fractional == 0) return {-1, rounding_direction_t::NONE};
+
+  pseudo_cost_averages_t<i_t, f_t> avgs = pc.compute_averages();
+
+  i_t branch_var                 = fractional[0];
+  f_t max_score                  = std::numeric_limits<f_t>::lowest();
+  rounding_direction_t round_dir = rounding_direction_t::DOWN;
+  constexpr f_t eps              = f_t(1e-6);
+
+  for (i_t j : fractional) {
+    f_t f_down  = solution[j] - std::floor(solution[j]);
+    f_t f_up    = std::ceil(solution[j]) - solution[j];
+    f_t pc_down = pc.pseudo_cost_num_down[j] != 0
+                    ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j]
+                    : avgs.down_avg;
+    f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j]
+                                              : avgs.up_avg;
+
+    f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down);
+    f_t score_up   = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up);
+
+    f_t score                = 0;
+    rounding_direction_t dir = rounding_direction_t::DOWN;
+
+    f_t root_val = (j < static_cast<i_t>(root_solution.size())) ? root_solution[j] : solution[j];
+
+    if (solution[j] < root_val - f_t(0.4)) {
+      score = score_down;
+      dir   = rounding_direction_t::DOWN;
+    } else if (solution[j] > root_val + f_t(0.4)) {
+      score = score_up;
+      dir   = rounding_direction_t::UP;
+    } else if (f_down < f_t(0.3)) {
+      score = score_down;
+      dir   = rounding_direction_t::DOWN;
+    } else if (f_down > f_t(0.7)) {
+      score = score_up;
+      dir   = rounding_direction_t::UP;
+    } else if (pc_down < pc_up + eps) {
+      score = score_down;
+      dir   = rounding_direction_t::DOWN;
+    } else {
+      score = score_up;
+      dir   = rounding_direction_t::UP;
+    }
+
+    if (score > max_score) {
+      max_score  = score;
+      branch_var = j;
+      round_dir  = dir;
+    }
+  }
+
+  if (round_dir == rounding_direction_t::NONE) {
+    branch_var = fractional[0];
+    round_dir  = rounding_direction_t::DOWN;
+  }
+
+  return {branch_var, round_dir};
 }
 
-template <typename i_t, typename f_t>
-branch_variable_t<i_t> guided_diving(pseudo_costs_t<i_t, f_t>& pc,
+template <typename i_t, typename f_t, branch_and_bound_mode_t Mode>
+branch_variable_t<i_t> guided_diving(pseudo_costs_t<i_t, f_t, Mode>& pc,
                                      const std::vector<i_t>& fractional,
                                      const std::vector<f_t>& solution,
                                      const std::vector<f_t>& incumbent,
                                      logger_t& log)
 {
-  return guided_diving_from_arrays(pc.pseudo_cost_sum_down.data(),
-                                   pc.pseudo_cost_sum_up.data(),
-                                   pc.pseudo_cost_num_down.data(),
-                                   pc.pseudo_cost_num_up.data(),
-                                   (i_t)pc.pseudo_cost_sum_down.size(),
-                                   fractional,
-                                   solution,
-                                   incumbent);
+  const i_t num_fractional = fractional.size();
+  if (num_fractional == 0) return {-1, rounding_direction_t::NONE};
+
+  pseudo_cost_averages_t<i_t, f_t> avgs = pc.compute_averages();
+
+  i_t branch_var                 = fractional[0];
+  f_t max_score                  = std::numeric_limits<f_t>::lowest();
+  rounding_direction_t round_dir = rounding_direction_t::DOWN;
+  constexpr f_t eps              = f_t(1e-6);
+
+  for (i_t j : fractional) {
+    f_t f_down    = solution[j] - std::floor(solution[j]);
+    f_t f_up      = std::ceil(solution[j]) - solution[j];
+    f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j]));
+    f_t up_dist   = std::abs(std::ceil(solution[j]) - incumbent[j]);
+    rounding_direction_t dir =
+      down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP;
+
+    f_t pc_down = pc.pseudo_cost_num_down[j] != 0
+                    ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j]
+                    : avgs.down_avg;
+    f_t pc_up  = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j]
+                                               : avgs.up_avg;
+    f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up;
+    f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down;
+    f_t score  = (score1 + score2) / 6;
+
+    if (score > max_score) {
+      max_score  = score;
+      branch_var = j;
+      round_dir  = dir;
+    }
+  }
+
+  return {branch_var, round_dir};
 }
 
 template <typename i_t, typename f_t>
@@ -187,12 +266,26 @@ template branch_variable_t<int> pseudocost_diving(pseudo_costs_t<int, double>& p
                                                   const std::vector<double>& root_solution,
                                                   logger_t& log);
 
+template branch_variable_t<int> pseudocost_diving(
+  pseudo_costs_t<int, double, branch_and_bound_mode_t::DETERMINISTIC>& pc,
+  const std::vector<int>& fractional,
+  const std::vector<double>& solution,
+  const std::vector<double>& root_solution,
+  logger_t& log);
+
 template branch_variable_t<int> guided_diving(pseudo_costs_t<int, double>& pc,
                                               const std::vector<int>& fractional,
                                               const std::vector<double>& solution,
                                               const std::vector<double>& incumbent,
                                               logger_t& log);
 
+template branch_variable_t<int> guided_diving(
+  pseudo_costs_t<int, double, branch_and_bound_mode_t::DETERMINISTIC>& pc,
+  const std::vector<int>& fractional,
+  const std::vector<double>& solution,
+  const std::vector<double>& incumbent,
+  logger_t& log);
+
 template void calculate_variable_locks(const lp_problem_t<int, double>& lp_problem,
                                        std::vector<int>& up_locks,
                                        std::vector<int>& down_locks);
diff --git a/cpp/src/branch_and_bound/diving_heuristics.hpp b/cpp/src/branch_and_bound/diving_heuristics.hpp
index dfeabe3a5f..325aa0b878 100644
--- a/cpp/src/branch_and_bound/diving_heuristics.hpp
+++ b/cpp/src/branch_and_bound/diving_heuristics.hpp
@@ -22,15 +22,15 @@ branch_variable_t<i_t> line_search_diving(const std::vector<i_t>& fractional,
                                           const std::vector<f_t>& root_solution,
                                           logger_t& log);
 
-template <typename i_t, typename f_t>
-branch_variable_t<i_t> pseudocost_diving(pseudo_costs_t<i_t, f_t>& pc,
+template <typename i_t, typename f_t, branch_and_bound_mode_t Mode>
+branch_variable_t<i_t> pseudocost_diving(pseudo_costs_t<i_t, f_t, Mode>& pc,
                                          const std::vector<i_t>& fractional,
                                          const std::vector<f_t>& solution,
                                          const std::vector<f_t>& root_solution,
                                          logger_t& log);
 
-template <typename i_t, typename f_t>
-branch_variable_t<i_t> guided_diving(pseudo_costs_t<i_t, f_t>& pc,
+template <typename i_t, typename f_t, branch_and_bound_mode_t Mode>
+branch_variable_t<i_t> guided_diving(pseudo_costs_t<i_t, f_t, Mode>& pc,
                                      const std::vector<i_t>& fractional,
                                      const std::vector<f_t>& solution,
                                      const std::vector<f_t>& incumbent,
diff --git a/cpp/src/branch_and_bound/mip_node.cpp b/cpp/src/branch_and_bound/mip_node.cpp
deleted file mode 100644
index 7b0f644f4e..0000000000
--- a/cpp/src/branch_and_bound/mip_node.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-/* clang-format off */
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- */
-/* clang-format on */
-
-#include <branch_and_bound/mip_node.hpp>
-
-namespace cuopt::linear_programming::dual_simplex {
-
-bool inactive_status(node_status_t status)
-{
-  return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE ||
-          status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL);
-}
-
-}  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp
index a24f67c3bc..cce23c3bd7 100644
--- a/cpp/src/branch_and_bound/mip_node.hpp
+++ b/cpp/src/branch_and_bound/mip_node.hpp
@@ -7,6 +7,8 @@
 
 #pragma once
 
+#include <branch_and_bound/constants.hpp>
+
 #include <dual_simplex/initial_basis.hpp>
 #include <dual_simplex/types.hpp>
 
@@ -29,9 +31,11 @@ enum class node_status_t : int {
   NUMERICAL        = 5   // Encountered numerical issue when solving the LP relaxation
 };
 
-enum class rounding_direction_t : int8_t { NONE = -1, DOWN = 0, UP = 1 };
-
-bool inactive_status(node_status_t status);
+inline bool inactive_status(node_status_t status)
+{
+  return (status == node_status_t::FATHOMED || status == node_status_t::INTEGER_FEASIBLE ||
+          status == node_status_t::INFEASIBLE || status == node_status_t::NUMERICAL);
+}
 
 template <typename i_t, typename f_t>
 class mip_node_t {
diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp
index c38e98e27d..cf67a69046 100644
--- a/cpp/src/branch_and_bound/pseudo_costs.cpp
+++ b/cpp/src/branch_and_bound/pseudo_costs.cpp
@@ -218,8 +218,10 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t<i_t, f_t>& lp,
                                            const std::vector<i_t>& basic_list,
                                            const std::vector<i_t>& nonbasic_list,
                                            const std::vector<i_t>& fractional,
+                                           const csc_matrix_t<i_t, f_t>& AT,
                                            basis_update_mpf_t<i_t, f_t>& basis_factors,
-                                           pseudo_costs_t<i_t, f_t>& pc)
+                                           std::vector<f_t>& strong_branch_down,
+                                           std::vector<f_t>& strong_branch_up)
 {
   i_t m = lp.num_rows;
   i_t n = lp.num_cols;
@@ -246,7 +248,7 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t<i_t, f_t>& lp,
     objective_change_estimate_t<f_t> estimate =
       single_pivot_objective_change_estimate(lp,
                                              settings,
-                                             pc.AT,
+                                             AT,
                                              vstatus,
                                              j,
                                              basic_map[j],
@@ -258,8 +260,8 @@ void initialize_pseudo_costs_with_estimate(const lp_problem_t<i_t, f_t>& lp,
                                              workspace,
                                              delta_z,
                                              work_estimate);
-    pc.strong_branch_down[k] = estimate.down_obj_change;
-    pc.strong_branch_up[k]   = estimate.up_obj_change;
+    strong_branch_down[k] = estimate.down_obj_change;
+    strong_branch_up[k]   = estimate.up_obj_change;
   }
 }
 
@@ -298,12 +300,14 @@ void strong_branch_helper(i_t start,
                           f_t root_obj,
                           f_t upper_bound,
                           i_t iter_limit,
-                          pseudo_costs_t<i_t, f_t>& pc,
+                          std::vector<f_t>& strong_branch_down,
+                          std::vector<f_t>& strong_branch_up,
                           std::vector<f_t>& dual_simplex_obj_down,
                           std::vector<f_t>& dual_simplex_obj_up,
                           std::vector<dual::status_t>& dual_simplex_status_down,
                           std::vector<dual::status_t>& dual_simplex_status_up,
-                          shared_strong_branching_context_view_t<i_t, f_t>& sb_view)
+                          shared_strong_branching_context_view_t<i_t, f_t>& sb_view,
+                          omp_atomic_t<i_t>& num_strong_branches_completed)
 {
   raft::common::nvtx::range scope("BB::strong_branch_helper");
   lp_problem_t child_problem = original_lp;
@@ -380,7 +384,7 @@ void strong_branch_helper(i_t start,
       }
 
       if (branch == 0) {
-        pc.strong_branch_down[k]    = std::max(obj - root_obj, 0.0);
+        strong_branch_down[k]       = std::max(obj - root_obj, 0.0);
         dual_simplex_obj_down[k]    = std::max(obj - root_obj, 0.0);
         dual_simplex_status_down[k] = status;
         if (verbose) {
@@ -393,7 +397,7 @@ void strong_branch_helper(i_t start,
                               toc(start_time));
         }
       } else {
-        pc.strong_branch_up[k]    = std::max(obj - root_obj, 0.0);
+        strong_branch_up[k]       = std::max(obj - root_obj, 0.0);
         dual_simplex_obj_up[k]    = std::max(obj - root_obj, 0.0);
         dual_simplex_status_up[k] = status;
         if (verbose) {
@@ -431,7 +435,7 @@ void strong_branch_helper(i_t start,
     }
     if (toc(start_time) > settings.time_limit) { break; }
 
-    const i_t completed = pc.num_strong_branches_completed++;
+    const i_t completed = num_strong_branches_completed++;
 
     if (thread_id == 0 && toc(last_log) > 10) {
       last_log = tic();
@@ -463,7 +467,7 @@ std::pair<f_t, dual::status_t> trial_branching(const lp_problem_t<i_t, f_t>& ori
                                                f_t upper_bound,
                                                f_t start_time,
                                                i_t iter_limit,
-                                               omp_atomic_t<int64_t>& total_lp_iter)
+                                               i_t& iter)
 {
   lp_problem_t child_problem      = original_lp;
   child_problem.lower[branch_var] = branch_var_lower;
@@ -479,7 +483,7 @@ std::pair<f_t, dual::status_t> trial_branching(const lp_problem_t<i_t, f_t>& ori
     objective_upper_bound(child_problem, upper_bound, child_settings.dual_tol);
 
   lp_solution_t<i_t, f_t> solution(original_lp.num_rows, original_lp.num_cols);
-  i_t iter                                         = 0;
+  iter                                             = 0;
   std::vector<variable_status_t> child_vstatus     = vstatus;
   std::vector<f_t> child_edge_norms                = edge_norms;
   std::vector<i_t> child_basic_list                = basic_list;
@@ -502,7 +506,7 @@ std::pair<f_t, dual::status_t> trial_branching(const lp_problem_t<i_t, f_t>& ori
                                                           solution,
                                                           iter,
                                                           child_edge_norms);
-  total_lp_iter += iter;
+
   settings.log.debug("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %s. Obj %e\n",
                      branch_var,
                      child_problem.lower[branch_var],
@@ -732,9 +736,9 @@ static void batch_pdlp_strong_branching_task(
     std::max(static_cast<f_t>(0.0), settings.time_limit - batch_elapsed_time);
   if (warm_start_remaining_time <= 0.0) { return; }
 
-  assert(!pc.pdlp_warm_cache.populated && "PDLP warm cache should not be populated at this point");
+  assert(!pc.pdlp_warm_cache->populated && "PDLP warm cache should not be populated at this point");
 
-  if (!pc.pdlp_warm_cache.populated) {
+  if (!pc.pdlp_warm_cache->populated) {
     pdlp_solver_settings_t<i_t, f_t> ws_settings;
     ws_settings.method               = method_t::PDLP;
     ws_settings.presolver            = presolver_t::None;
@@ -756,14 +760,14 @@ static void batch_pdlp_strong_branching_task(
     ws_settings.inside_mip                           = true;
     if (effective_batch_pdlp == 1) { ws_settings.concurrent_halt = &concurrent_halt; }
 
-    auto start_time = std::chrono::high_resolution_clock::now();
+    auto lp_start_time = std::chrono::high_resolution_clock::now();
 
-    auto ws_solution = solve_lp(&pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, ws_settings);
+    auto ws_solution = solve_lp(&pc.pdlp_warm_cache->batch_pdlp_handle, mps_model, ws_settings);
 
     if (verbose) {
       auto end_time = std::chrono::high_resolution_clock::now();
       auto duration =
-        std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
+        std::chrono::duration_cast<std::chrono::milliseconds>(end_time - lp_start_time).count();
       settings.log.printf(
         "Original problem solved in %d milliseconds"
         " and iterations: %d\n",
@@ -777,21 +781,21 @@ static void batch_pdlp_strong_branching_task(
       const auto& ws_dual   = ws_solution.get_dual_solution();
       // Need to use the pc steam since the batch pdlp handle will get destroyed after the warm
       // start
-      cache.initial_primal = rmm::device_uvector<f_t>(ws_primal, ws_primal.stream());
-      cache.initial_dual   = rmm::device_uvector<f_t>(ws_dual, ws_dual.stream());
-      cache.step_size      = ws_solution.get_pdlp_warm_start_data().initial_step_size_;
-      cache.primal_weight  = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_;
-      cache.pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_;
-      cache.populated      = true;
+      cache->initial_primal = rmm::device_uvector<f_t>(ws_primal, ws_primal.stream());
+      cache->initial_dual   = rmm::device_uvector<f_t>(ws_dual, ws_dual.stream());
+      cache->step_size      = ws_solution.get_pdlp_warm_start_data().initial_step_size_;
+      cache->primal_weight  = ws_solution.get_pdlp_warm_start_data().initial_primal_weight_;
+      cache->pdlp_iteration = ws_solution.get_pdlp_warm_start_data().total_pdlp_iterations_;
+      cache->populated      = true;
 
       if (verbose) {
         settings.log.printf(
           "Cached PDLP warm start: primal=%zu dual=%zu step_size=%e primal_weight=%e iters=%d\n",
-          cache.initial_primal.size(),
-          cache.initial_dual.size(),
-          cache.step_size,
-          cache.primal_weight,
-          cache.pdlp_iteration);
+          cache->initial_primal.size(),
+          cache->initial_dual.size(),
+          cache->step_size,
+          cache->primal_weight,
+          cache->pdlp_iteration);
       }
     } else {
       if (verbose) {
@@ -817,22 +821,23 @@ static void batch_pdlp_strong_branching_task(
   if (batch_remaining_time <= 0.0) { return; }
   pdlp_settings.time_limit = batch_remaining_time;
 
-  if (pc.pdlp_warm_cache.populated) {
+  if (pc.pdlp_warm_cache->populated) {
     auto& cache = pc.pdlp_warm_cache;
-    pdlp_settings.set_initial_primal_solution(cache.initial_primal.data(),
-                                              cache.initial_primal.size(),
-                                              cache.batch_pdlp_handle.get_stream());
-    pdlp_settings.set_initial_dual_solution(
-      cache.initial_dual.data(), cache.initial_dual.size(), cache.batch_pdlp_handle.get_stream());
-    pdlp_settings.set_initial_step_size(cache.step_size);
-    pdlp_settings.set_initial_primal_weight(cache.primal_weight);
-    pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration);
+    pdlp_settings.set_initial_primal_solution(cache->initial_primal.data(),
+                                              cache->initial_primal.size(),
+                                              cache->batch_pdlp_handle.get_stream());
+    pdlp_settings.set_initial_dual_solution(cache->initial_dual.data(),
+                                            cache->initial_dual.size(),
+                                            cache->batch_pdlp_handle.get_stream());
+    pdlp_settings.set_initial_step_size(cache->step_size);
+    pdlp_settings.set_initial_primal_weight(cache->primal_weight);
+    pdlp_settings.set_initial_pdlp_iteration(cache->pdlp_iteration);
   }
 
   if (concurrent_halt.load() == 1) { return; }
 
   const auto solutions = batch_pdlp_solve(
-    &pc.pdlp_warm_cache.batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings);
+    &pc.pdlp_warm_cache->batch_pdlp_handle, mps_model, fractional, fraction_values, pdlp_settings);
   f_t batch_pdlp_strong_branching_time = toc(start_batch);
 
   // Fail safe in case the batch PDLP failed and produced no solutions
@@ -888,7 +893,7 @@ static void batch_pdlp_reliability_branching_task(
   const std::vector<i_t>& candidate_vars,
   const simplex_solver_settings_t<i_t, f_t>& settings,
   shared_strong_branching_context_view_t<i_t, f_t>& sb_view,
-  batch_pdlp_warm_cache_t<i_t, f_t>& pdlp_warm_cache,
+  batch_pdlp_warm_cache_t<i_t, f_t>* pdlp_warm_cache,
   std::vector<f_t>& pdlp_obj_down,
   std::vector<f_t>& pdlp_obj_up)
 {
@@ -935,15 +940,16 @@ static void batch_pdlp_reliability_branching_task(
   }
   pdlp_settings.time_limit = batch_remaining_time;
 
-  if (pdlp_warm_cache.populated) {
-    auto& cache = pdlp_warm_cache;
-    pdlp_settings.set_initial_primal_solution(
-      cache.initial_primal.data(), cache.initial_primal.size(), batch_pdlp_handle.get_stream());
-    pdlp_settings.set_initial_dual_solution(
-      cache.initial_dual.data(), cache.initial_dual.size(), batch_pdlp_handle.get_stream());
-    pdlp_settings.set_initial_step_size(cache.step_size);
-    pdlp_settings.set_initial_primal_weight(cache.primal_weight);
-    pdlp_settings.set_initial_pdlp_iteration(cache.pdlp_iteration);
+  if (pdlp_warm_cache->populated) {
+    pdlp_settings.set_initial_primal_solution(pdlp_warm_cache->initial_primal.data(),
+                                              pdlp_warm_cache->initial_primal.size(),
+                                              batch_pdlp_handle.get_stream());
+    pdlp_settings.set_initial_dual_solution(pdlp_warm_cache->initial_dual.data(),
+                                            pdlp_warm_cache->initial_dual.size(),
+                                            batch_pdlp_handle.get_stream());
+    pdlp_settings.set_initial_step_size(pdlp_warm_cache->step_size);
+    pdlp_settings.set_initial_primal_weight(pdlp_warm_cache->primal_weight);
+    pdlp_settings.set_initial_pdlp_iteration(pdlp_warm_cache->pdlp_iteration);
   }
 
   if (concurrent_halt.load() == 1) { return; }
@@ -1002,9 +1008,9 @@ void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
   constexpr bool verbose = false;
 
   pc.resize(original_lp.num_cols);
-  pc.strong_branch_down.assign(fractional.size(), 0);
-  pc.strong_branch_up.assign(fractional.size(), 0);
-  pc.num_strong_branches_completed = 0;
+  std::vector<f_t> strong_branch_down(fractional.size(), std::numeric_limits<f_t>::quiet_NaN());
+  std::vector<f_t> strong_branch_up(fractional.size(), std::numeric_limits<f_t>::quiet_NaN());
+  omp_atomic_t<i_t> num_strong_branches_completed = 0;
 
   const f_t elapsed_time = toc(start_time);
   if (elapsed_time > settings.time_limit) { return; }
@@ -1049,8 +1055,10 @@ void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
                                           basic_list,
                                           nonbasic_list,
                                           fractional,
+                                          *pc.AT,
                                           basis_factors,
-                                          pc);
+                                          strong_branch_down,
+                                          strong_branch_up);
   } else {
 #pragma omp parallel num_threads(settings.num_threads)
     {
@@ -1082,7 +1090,6 @@ void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
             i_t start = std::floor(k * fractional.size() / n);
             i_t end   = std::floor((k + 1) * fractional.size() / n);
 
-            constexpr bool verbose = false;
             if (verbose) {
               settings.log.printf("Thread id %d task id %d start %d end %d. size %d\n",
                                   omp_get_thread_num(),
@@ -1105,12 +1112,14 @@ void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
                                  root_obj,
                                  upper_bound,
                                  simplex_iteration_limit,
-                                 pc,
+                                 strong_branch_down,
+                                 strong_branch_up,
                                  dual_simplex_obj_down,
                                  dual_simplex_obj_up,
                                  dual_simplex_status_down,
                                  dual_simplex_status_up,
-                                 sb_view);
+                                 sb_view,
+                                 num_strong_branches_completed);
           }
           // DS done: signal PDLP to stop (time-limit or all work done) and wait
           if (effective_batch_pdlp == 1) { concurrent_halt.store(1); }
@@ -1178,7 +1187,7 @@ void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
     for (i_t k = 0; k < fractional.size(); k++) {
       for (i_t branch = 0; branch < 2; branch++) {
         const bool is_down = (branch == 0);
-        f_t& sb_dest       = is_down ? pc.strong_branch_down[k] : pc.strong_branch_up[k];
+        f_t& sb_dest       = is_down ? strong_branch_down[k] : strong_branch_up[k];
         f_t ds_obj         = is_down ? dual_simplex_obj_down[k] : dual_simplex_obj_up[k];
         dual::status_t ds_status =
           is_down ? dual_simplex_status_down[k] : dual_simplex_status_up[k];
@@ -1211,12 +1220,12 @@ void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
       }
     }
 
-    pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root =
+    pc.pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root =
       (f_t(merged_from_pdlp) / f_t(fractional.size() * 2)) * 100.0;
     if (verbose) {
       settings.log.printf(
         "Batch PDLP for strong branching. Percent solved by batch PDLP at root: %f\n",
-        pc.pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root);
+        pc.pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root);
       settings.log.printf(
         "Merged results: %d from DS, %d from PDLP, %d unresolved (NaN), %d solved by both\n",
         merged_from_ds,
@@ -1226,28 +1235,29 @@ void strong_branching(const lp_problem_t<i_t, f_t>& original_lp,
     }
   }
 
-  pc.update_pseudo_costs_from_strong_branching(fractional, root_solution.x);
+  pc.update_pseudo_costs_from_strong_branching(
+    fractional, strong_branch_down, strong_branch_up, root_solution.x);
 }
 
-template <typename i_t, typename f_t>
-f_t pseudo_costs_t<i_t, f_t>::calculate_pseudocost_score(i_t j,
-                                                         const std::vector<f_t>& solution,
-                                                         f_t pseudo_cost_up_avg,
-                                                         f_t pseudo_cost_down_avg) const
+template <typename i_t, typename f_t, branch_and_bound_mode_t BnBMode>
+f_t pseudo_costs_t<i_t, f_t, BnBMode>::calculate_pseudocost_score(
+  i_t j, const std::vector<f_t>& solution, pseudo_cost_averages_t<i_t, f_t> averages) const
 {
   constexpr f_t eps = 1e-6;
   i_t num_up        = pseudo_cost_num_up[j];
+  f_t sum_up        = pseudo_cost_sum_up[j];
   i_t num_down      = pseudo_cost_num_down[j];
-  f_t pc_up         = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg;
-  f_t pc_down       = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg;
+  f_t sum_down      = pseudo_cost_sum_down[j];
+  f_t pc_up         = num_up > 0 ? sum_up / num_up : averages.up_avg;
+  f_t pc_down       = num_down > 0 ? sum_down / num_down : averages.down_avg;
   f_t f_down        = solution[j] - std::floor(solution[j]);
   f_t f_up          = std::ceil(solution[j]) - solution[j];
   return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps);
 }
 
-template <typename i_t, typename f_t>
-void pseudo_costs_t<i_t, f_t>::update_pseudo_costs(mip_node_t<i_t, f_t>* node_ptr,
-                                                   f_t leaf_objective)
+template <typename i_t, typename f_t, branch_and_bound_mode_t BnBMode>
+void pseudo_costs_t<i_t, f_t, BnBMode>::update_pseudo_costs(mip_node_t<i_t, f_t>* node_ptr,
+                                                            f_t leaf_objective)
 {
   const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0);
   const f_t frac          = node_ptr->branch_dir == rounding_direction_t::DOWN
@@ -1263,43 +1273,51 @@ void pseudo_costs_t<i_t, f_t>::update_pseudo_costs(mip_node_t<i_t, f_t>* node_pt
   }
 }
 
-template <typename i_t, typename f_t>
-void pseudo_costs_t<i_t, f_t>::initialized(i_t& num_initialized_down,
-                                           i_t& num_initialized_up,
-                                           f_t& pseudo_cost_down_avg,
-                                           f_t& pseudo_cost_up_avg) const
+template <typename i_t, typename f_t, branch_and_bound_mode_t BnBMode>
+pseudo_cost_averages_t<i_t, f_t> pseudo_costs_t<i_t, f_t, BnBMode>::compute_averages() const
 {
-  auto avgs            = compute_pseudo_cost_averages(pseudo_cost_sum_down.data(),
-                                           pseudo_cost_sum_up.data(),
-                                           pseudo_cost_num_down.data(),
-                                           pseudo_cost_num_up.data(),
-                                           pseudo_cost_sum_down.size());
-  pseudo_cost_down_avg = avgs.down_avg;
-  pseudo_cost_up_avg   = avgs.up_avg;
+  i_t num_initialized_down = 0;
+  i_t num_initialized_up   = 0;
+  f_t pseudo_cost_down_avg = 0.0;
+  f_t pseudo_cost_up_avg   = 0.0;
+
+  for (size_t j = 0; j < pseudo_cost_sum_down.size(); ++j) {
+    if (pseudo_cost_num_down[j] > 0 && std::isfinite(pseudo_cost_sum_down[j])) {
+      ++num_initialized_down;
+      pseudo_cost_down_avg += pseudo_cost_sum_down[j] / pseudo_cost_num_down[j];
+    }
+
+    if (pseudo_cost_num_up[j] > 0 && std::isfinite(pseudo_cost_sum_up[j])) {
+      ++num_initialized_up;
+      pseudo_cost_up_avg += pseudo_cost_sum_up[j] / pseudo_cost_num_up[j];
+    }
+  }
+
+  pseudo_cost_averages_t<i_t, f_t> averages{
+    .down_avg      = (num_initialized_down > 0) ? pseudo_cost_down_avg / num_initialized_down : 1.0,
+    .num_init_down = num_initialized_down,
+    .up_avg        = (num_initialized_up > 0) ? pseudo_cost_up_avg / num_initialized_up : 1.0,
+    .num_init_up   = num_initialized_up};
+  return averages;
 }
 
-template <typename i_t, typename f_t>
-i_t pseudo_costs_t<i_t, f_t>::variable_selection(const std::vector<i_t>& fractional,
-                                                 const std::vector<f_t>& solution,
-                                                 logger_t& log)
+template <typename i_t, typename f_t, branch_and_bound_mode_t BnBMode>
+i_t pseudo_costs_t<i_t, f_t, BnBMode>::variable_selection(const std::vector<i_t>& fractional,
+                                                          const std::vector<f_t>& solution,
+                                                          logger_t& log)
 {
-  i_t branch_var = fractional[0];
-  f_t max_score  = -1;
-  i_t num_initialized_down;
-  i_t num_initialized_up;
-  f_t pseudo_cost_down_avg;
-  f_t pseudo_cost_up_avg;
-
-  initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg);
+  i_t branch_var                            = fractional[0];
+  f_t max_score                             = -1;
+  pseudo_cost_averages_t<i_t, f_t> averages = compute_averages();
 
   log.printf("PC: num initialized down %d up %d avg down %e up %e\n",
-             num_initialized_down,
-             num_initialized_up,
-             pseudo_cost_down_avg,
-             pseudo_cost_up_avg);
+             averages.num_init_down,
+             averages.num_init_up,
+             averages.down_avg,
+             averages.up_avg);
 
   for (i_t j : fractional) {
-    f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg);
+    f_t score = calculate_pseudocost_score(j, solution, averages);
 
     if (score > max_score) {
       max_score  = score;
@@ -1315,8 +1333,8 @@ i_t pseudo_costs_t<i_t, f_t>::variable_selection(const std::vector<i_t>& fractio
   return branch_var;
 }
 
-template <typename i_t, typename f_t>
-i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
+template <typename i_t, typename f_t, branch_and_bound_mode_t BnBMode>
+i_t pseudo_costs_t<i_t, f_t, BnBMode>::reliable_variable_selection(
   const mip_node_t<i_t, f_t>* node_ptr,
   const std::vector<i_t>& fractional,
   branch_and_bound_worker_t<i_t, f_t>* worker,
@@ -1329,12 +1347,11 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
   const std::vector<i_t>& new_slacks,
   const lp_problem_t<i_t, f_t>& original_lp)
 {
-  constexpr f_t eps                      = 1e-6;
-  f_t start_time                         = bnb_stats.start_time;
-  i_t branch_var                         = fractional[0];
-  f_t max_score                          = -1;
-  f_t pseudo_cost_down_avg               = -1;
-  f_t pseudo_cost_up_avg                 = -1;
+  constexpr f_t eps = 1e-6;
+  f_t start_time    = bnb_stats.start_time;
+  i_t branch_var    = fractional[0];
+  f_t max_score     = -1;
+  pseudo_cost_averages_t<i_t, f_t> averages;
   lp_solution_t<i_t, f_t>& leaf_solution = worker->leaf_solution;
 
   const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters;
@@ -1367,14 +1384,12 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
   // In the latter, we are not using the average pseudocost (which calculated in the `initialized`
   // method).
   if (reliable_threshold == 0) {
-    i_t num_initialized_up;
-    i_t num_initialized_down;
-    initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg);
+    averages = compute_averages();
     log.printf("PC: num initialized down %d up %d avg down %e up %e\n",
-               num_initialized_down,
-               num_initialized_up,
-               pseudo_cost_down_avg,
-               pseudo_cost_up_avg);
+               averages.num_init_down,
+               averages.num_init_up,
+               averages.down_avg,
+               averages.up_avg);
   }
 
   std::vector<std::pair<f_t, i_t>> unreliable_list;
@@ -1386,8 +1401,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
       unreliable_list.push_back(std::make_pair(-1, j));
       continue;
     }
-    f_t score =
-      calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg);
+    f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages);
 
     if (score > max_score) {
       max_score  = score;
@@ -1416,12 +1430,12 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
   constexpr f_t min_percent_solved_by_batch_pdlp_at_root_for_pdlp = 5.0;
   // Batch PDLP is either forced or we use the heuristic to decide if it should be used
   const bool use_pdlp = (rb_mode == 2) || (rb_mode != 0 && !settings.sub_mip &&
-                                           !settings.deterministic && pdlp_warm_cache.populated &&
+                                           !settings.deterministic && pdlp_warm_cache->populated &&
                                            unreliable_list.size() > min_num_candidates_for_pdlp &&
-                                           pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root >
+                                           pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root >
                                              min_percent_solved_by_batch_pdlp_at_root_for_pdlp);
 
-  if (rb_mode != 0 && !pdlp_warm_cache.populated) {
+  if (rb_mode != 0 && !pdlp_warm_cache->populated) {
     log.printf("PDLP warm start data not populated, using DS only\n");
   } else if (rb_mode != 0 && settings.sub_mip) {
     log.printf("Batch PDLP reliability branching is disabled because sub-MIP is enabled\n");
@@ -1430,7 +1444,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
       "Batch PDLP reliability branching is disabled because deterministic mode is enabled\n");
   } else if (rb_mode != 0 && unreliable_list.size() < min_num_candidates_for_pdlp) {
     log.printf("Not enough candidates to use batch PDLP, using DS only\n");
-  } else if (rb_mode != 0 && pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root < 5.0) {
+  } else if (rb_mode != 0 && pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root < 5.0) {
     log.printf("Percent solved by batch PDLP at root is too low, using DS only\n");
   } else if (use_pdlp) {
     log.printf(
@@ -1438,7 +1452,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
       "by batch PDLP at root is %f%% (> %f%%)\n",
       static_cast<i_t>(unreliable_list.size()),
       min_num_candidates_for_pdlp,
-      pdlp_warm_cache.percent_solved_by_batch_pdlp_at_root,
+      pdlp_warm_cache->percent_solved_by_batch_pdlp_at_root,
       min_percent_solved_by_batch_pdlp_at_root_for_pdlp);
   }
 
@@ -1456,7 +1470,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
 
   log.printf(
     "RB iters = %d, B&B iters = %d, unreliable = %d, num_tasks = %d, reliable_threshold = %d\n",
-    strong_branching_lp_iter.load(),
+    static_cast<int64_t>(strong_branching_lp_iter),
     branch_and_bound_lp_iters,
     unreliable_list.size(),
     num_tasks,
@@ -1487,7 +1501,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
           objective_change_estimate_t<f_t> estimate =
             single_pivot_objective_change_estimate(worker->leaf_problem,
                                                    settings,
-                                                   AT,
+                                                   *AT,
                                                    node_ptr->vstatus,
                                                    j,
                                                    basic_map[j],
@@ -1503,8 +1517,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
           score = std::max(estimate.up_obj_change, eps) * std::max(estimate.down_obj_change, eps);
         } else {
           // Use the previous score, even if it is unreliable
-          score = calculate_pseudocost_score(
-            j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg);
+          score = calculate_pseudocost_score(j, leaf_solution.x, averages);
         }
       }
     } else {
@@ -1554,7 +1567,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
                                           candidate_vars,
                                           settings,
                                           sb_view,
-                                          pdlp_warm_cache,
+                                          pdlp_warm_cache.get(),
                                           pdlp_obj_down,
                                           pdlp_obj_up);
   }
@@ -1596,6 +1609,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
         pseudo_cost_mutex_down[j].lock();
         if (pseudo_cost_num_down[j] < reliable_threshold) {
           // Do trial branching on the down branch
+          i_t iter                 = 0;
           const auto [obj, status] = trial_branching(worker->leaf_problem,
                                                      settings,
                                                      var_types,
@@ -1610,7 +1624,8 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
                                                      upper_bound,
                                                      start_time,
                                                      iter_limit_per_trial,
-                                                     strong_branching_lp_iter);
+                                                     iter);
+          strong_branching_lp_iter += iter;
 
           dual_simplex_obj_down[i]    = obj;
           dual_simplex_status_down[i] = status;
@@ -1639,6 +1654,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
       } else {
         pseudo_cost_mutex_up[j].lock();
         if (pseudo_cost_num_up[j] < reliable_threshold) {
+          i_t iter                 = 0;
           const auto [obj, status] = trial_branching(worker->leaf_problem,
                                                      settings,
                                                      var_types,
@@ -1653,7 +1669,8 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
                                                      upper_bound,
                                                      start_time,
                                                      iter_limit_per_trial,
-                                                     strong_branching_lp_iter);
+                                                     iter);
+          strong_branching_lp_iter += iter;
 
           dual_simplex_obj_up[i]    = obj;
           dual_simplex_status_up[i] = status;
@@ -1674,9 +1691,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
 
       if (toc(start_time) > settings.time_limit) { continue; }
 
-      score =
-        calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg);
-
+      score = calculate_pseudocost_score(j, leaf_solution.x, averages);
       score_mutex.lock();
       if (score > max_score) {
         max_score  = score;
@@ -1756,8 +1771,7 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
         }
       }
 
-      f_t score =
-        calculate_pseudocost_score(j, leaf_solution.x, pseudo_cost_up_avg, pseudo_cost_down_avg);
+      f_t score = calculate_pseudocost_score(j, leaf_solution.x, averages);
       if (score > max_score) {
         max_score  = score;
         branch_var = j;
@@ -1777,28 +1791,23 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(
   return branch_var;
 }
 
-template <typename i_t, typename f_t>
-f_t pseudo_costs_t<i_t, f_t>::obj_estimate(const std::vector<i_t>& fractional,
-                                           const std::vector<f_t>& solution,
-                                           f_t lower_bound,
-                                           logger_t& log)
+template <typename i_t, typename f_t, branch_and_bound_mode_t BnBMode>
+f_t pseudo_costs_t<i_t, f_t, BnBMode>::obj_estimate(const std::vector<i_t>& fractional,
+                                                    const std::vector<f_t>& solution,
+                                                    f_t lower_bound,
+                                                    logger_t& log)
 {
   const i_t num_fractional = fractional.size();
   f_t estimate             = lower_bound;
 
-  i_t num_initialized_down;
-  i_t num_initialized_up;
-  f_t pseudo_cost_down_avg;
-  f_t pseudo_cost_up_avg;
-
-  initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg);
+  pseudo_cost_averages_t<i_t, f_t> averages = compute_averages();
 
   for (i_t j : fractional) {
     constexpr f_t eps = 1e-6;
     i_t num_up        = pseudo_cost_num_up[j];
     i_t num_down      = pseudo_cost_num_down[j];
-    f_t pc_up         = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg;
-    f_t pc_down       = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg;
+    f_t pc_up         = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : averages.up_avg;
+    f_t pc_down       = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : averages.down_avg;
     f_t f_down        = solution[j] - std::floor(solution[j]);
     f_t f_up          = std::ceil(solution[j]) - solution[j];
     estimate += std::min(pc_down * f_down, pc_up * f_up);
@@ -1808,9 +1817,12 @@ f_t pseudo_costs_t<i_t, f_t>::obj_estimate(const std::vector<i_t>& fractional,
   return estimate;
 }
 
-template <typename i_t, typename f_t>
-void pseudo_costs_t<i_t, f_t>::update_pseudo_costs_from_strong_branching(
-  const std::vector<i_t>& fractional, const std::vector<f_t>& root_soln)
+template <typename i_t, typename f_t, branch_and_bound_mode_t BnBMode>
+void pseudo_costs_t<i_t, f_t, BnBMode>::update_pseudo_costs_from_strong_branching(
+  const std::vector<i_t>& fractional,
+  const std::vector<f_t>& strong_branch_down,
+  const std::vector<f_t>& strong_branch_up,
+  const std::vector<f_t>& root_soln)
 {
   for (i_t k = 0; k < fractional.size(); k++) {
     const i_t j = fractional[k];
@@ -1834,7 +1846,9 @@ void pseudo_costs_t<i_t, f_t>::update_pseudo_costs_from_strong_branching(
 
 #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
 
-template class pseudo_costs_t<int, double>;
+template class pseudo_costs_t<int, double, branch_and_bound_mode_t::PARALLEL>;
+template class pseudo_costs_t<int, double, branch_and_bound_mode_t::DETERMINISTIC>;
+template class pseudo_cost_snapshot_t<int, double>;
 
 template void strong_branching<int, double>(const lp_problem_t<int, double>& original_lp,
                                             const simplex_solver_settings_t<int, double>& settings,
diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp
index 009bd8b81a..5db0d573a2 100644
--- a/cpp/src/branch_and_bound/pseudo_costs.hpp
+++ b/cpp/src/branch_and_bound/pseudo_costs.hpp
@@ -7,8 +7,9 @@
 
 #pragma once
 
-#include <branch_and_bound/branch_and_bound_worker.hpp>
+#include <branch_and_bound/constants.hpp>
 #include <branch_and_bound/mip_node.hpp>
+#include <branch_and_bound/worker.hpp>
 
 #include <dual_simplex/basis_updates.hpp>
 #include <dual_simplex/logger.hpp>
@@ -27,354 +28,6 @@
 
 namespace cuopt::linear_programming::dual_simplex {
 
-template <typename i_t>
-struct branch_variable_t {
-  i_t variable;
-  rounding_direction_t direction;
-};
-
-template <typename i_t, typename f_t>
-struct pseudo_cost_update_t {
-  i_t variable;
-  rounding_direction_t direction;
-  f_t delta;
-  double work_timestamp;
-  int worker_id;
-
-  bool operator<(const pseudo_cost_update_t& other) const
-  {
-    if (work_timestamp != other.work_timestamp) return work_timestamp < other.work_timestamp;
-    if (variable != other.variable) return variable < other.variable;
-    if (delta != other.delta) return delta < other.delta;
-    return worker_id < other.worker_id;
-  }
-};
-
-template <typename f_t>
-struct pseudo_cost_averages_t {
-  f_t down_avg;
-  f_t up_avg;
-};
-
-// used to get T from omp_atomic_t<T> based on the fact that omp_atomic_t<T>::operator++ returns T
-template <typename T>
-using underlying_type = decltype(std::declval<T&>()++);
-
-// Necessary because omp_atomic_t<f_t> may be passed instead of f_t
-template <typename MaybeWrappedI, typename MaybeWrappedF>
-auto compute_pseudo_cost_averages(const MaybeWrappedF* pc_sum_down,
-                                  const MaybeWrappedF* pc_sum_up,
-                                  const MaybeWrappedI* pc_num_down,
-                                  const MaybeWrappedI* pc_num_up,
-                                  size_t n)
-{
-  using underlying_f_t = underlying_type<MaybeWrappedF>;
-  using underlying_i_t = underlying_type<MaybeWrappedI>;
-
-  underlying_i_t num_initialized_down = 0;
-  underlying_i_t num_initialized_up   = 0;
-  underlying_f_t pseudo_cost_down_avg = 0.0;
-  underlying_f_t pseudo_cost_up_avg   = 0.0;
-
-  for (size_t j = 0; j < n; ++j) {
-    if (pc_num_down[j] > 0) {
-      ++num_initialized_down;
-      if (std::isfinite(pc_sum_down[j])) {
-        pseudo_cost_down_avg += pc_sum_down[j] / pc_num_down[j];
-      }
-    }
-    if (pc_num_up[j] > 0) {
-      ++num_initialized_up;
-      if (std::isfinite(pc_sum_up[j])) { pseudo_cost_up_avg += pc_sum_up[j] / pc_num_up[j]; }
-    }
-  }
-
-  pseudo_cost_down_avg =
-    (num_initialized_down > 0) ? pseudo_cost_down_avg / num_initialized_down : 1.0;
-  pseudo_cost_up_avg = (num_initialized_up > 0) ? pseudo_cost_up_avg / num_initialized_up : 1.0;
-
-  return pseudo_cost_averages_t<underlying_f_t>{pseudo_cost_down_avg, pseudo_cost_up_avg};
-}
-
-// Variable selection using pseudo-cost product scoring
-// Returns the best variable to branch on
-template <typename i_t, typename f_t>
-i_t variable_selection_from_pseudo_costs(const f_t* pc_sum_down,
-                                         const f_t* pc_sum_up,
-                                         const i_t* pc_num_down,
-                                         const i_t* pc_num_up,
-                                         i_t n_vars,
-                                         const std::vector<i_t>& fractional,
-                                         const std::vector<f_t>& solution)
-{
-  const i_t num_fractional = fractional.size();
-  if (num_fractional == 0) return -1;
-
-  auto [pc_down_avg, pc_up_avg] =
-    compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars);
-
-  i_t branch_var    = fractional[0];
-  f_t max_score     = std::numeric_limits<f_t>::lowest();
-  constexpr f_t eps = f_t(1e-6);
-
-  for (i_t j : fractional) {
-    f_t pc_down      = pc_num_down[j] != 0 ? pc_sum_down[j] / pc_num_down[j] : pc_down_avg;
-    f_t pc_up        = pc_num_up[j] != 0 ? pc_sum_up[j] / pc_num_up[j] : pc_up_avg;
-    const f_t f_down = solution[j] - std::floor(solution[j]);
-    const f_t f_up   = std::ceil(solution[j]) - solution[j];
-    f_t score        = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps);
-    if (score > max_score) {
-      max_score  = score;
-      branch_var = j;
-    }
-  }
-
-  return branch_var;
-}
-
-// Objective estimate using pseudo-costs (lock-free implementation)
-// Returns lower_bound + estimated cost to reach integer feasibility
-template <typename i_t, typename f_t>
-f_t obj_estimate_from_arrays(const f_t* pc_sum_down,
-                             const f_t* pc_sum_up,
-                             const i_t* pc_num_down,
-                             const i_t* pc_num_up,
-                             i_t n_vars,
-                             const std::vector<i_t>& fractional,
-                             const std::vector<f_t>& solution,
-                             f_t lower_bound)
-{
-  auto [pc_down_avg, pc_up_avg] =
-    compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars);
-
-  f_t estimate      = lower_bound;
-  constexpr f_t eps = f_t(1e-6);
-
-  for (i_t j : fractional) {
-    f_t pc_down      = pc_num_down[j] != 0 ? pc_sum_down[j] / pc_num_down[j] : pc_down_avg;
-    f_t pc_up        = pc_num_up[j] != 0 ? pc_sum_up[j] / pc_num_up[j] : pc_up_avg;
-    const f_t f_down = solution[j] - std::floor(solution[j]);
-    const f_t f_up   = std::ceil(solution[j]) - solution[j];
-    estimate += std::min(std::max(pc_down * f_down, eps), std::max(pc_up * f_up, eps));
-  }
-
-  return estimate;
-}
-
-template <typename i_t, typename f_t, typename MaybeWrappedI = i_t, typename MaybeWrappedF = f_t>
-branch_variable_t<i_t> pseudocost_diving_from_arrays(const MaybeWrappedF* pc_sum_down,
-                                                     const MaybeWrappedF* pc_sum_up,
-                                                     const MaybeWrappedI* pc_num_down,
-                                                     const MaybeWrappedI* pc_num_up,
-                                                     i_t n_vars,
-                                                     const std::vector<i_t>& fractional,
-                                                     const std::vector<f_t>& solution,
-                                                     const std::vector<f_t>& root_solution)
-{
-  const i_t num_fractional = fractional.size();
-  if (num_fractional == 0) return {-1, rounding_direction_t::NONE};
-
-  auto avgs = compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars);
-
-  i_t branch_var                 = fractional[0];
-  f_t max_score                  = std::numeric_limits<f_t>::lowest();
-  rounding_direction_t round_dir = rounding_direction_t::DOWN;
-  constexpr f_t eps              = f_t(1e-6);
-
-  for (i_t j : fractional) {
-    f_t f_down  = solution[j] - std::floor(solution[j]);
-    f_t f_up    = std::ceil(solution[j]) - solution[j];
-    f_t pc_down = pc_num_down[j] != 0 ? (f_t)pc_sum_down[j] / (f_t)pc_num_down[j] : avgs.down_avg;
-    f_t pc_up   = pc_num_up[j] != 0 ? (f_t)pc_sum_up[j] / (f_t)pc_num_up[j] : avgs.up_avg;
-
-    f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down);
-    f_t score_up   = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up);
-
-    f_t score                = 0;
-    rounding_direction_t dir = rounding_direction_t::DOWN;
-
-    f_t root_val = (j < static_cast<i_t>(root_solution.size())) ? root_solution[j] : solution[j];
-
-    if (solution[j] < root_val - f_t(0.4)) {
-      score = score_down;
-      dir   = rounding_direction_t::DOWN;
-    } else if (solution[j] > root_val + f_t(0.4)) {
-      score = score_up;
-      dir   = rounding_direction_t::UP;
-    } else if (f_down < f_t(0.3)) {
-      score = score_down;
-      dir   = rounding_direction_t::DOWN;
-    } else if (f_down > f_t(0.7)) {
-      score = score_up;
-      dir   = rounding_direction_t::UP;
-    } else if (pc_down < pc_up + eps) {
-      score = score_down;
-      dir   = rounding_direction_t::DOWN;
-    } else {
-      score = score_up;
-      dir   = rounding_direction_t::UP;
-    }
-
-    if (score > max_score) {
-      max_score  = score;
-      branch_var = j;
-      round_dir  = dir;
-    }
-  }
-
-  if (round_dir == rounding_direction_t::NONE) {
-    branch_var = fractional[0];
-    round_dir  = rounding_direction_t::DOWN;
-  }
-
-  return {branch_var, round_dir};
-}
-
-template <typename i_t, typename f_t, typename MaybeWrappedI = i_t, typename MaybeWrappedF = f_t>
-branch_variable_t<i_t> guided_diving_from_arrays(const MaybeWrappedF* pc_sum_down,
-                                                 const MaybeWrappedF* pc_sum_up,
-                                                 const MaybeWrappedI* pc_num_down,
-                                                 const MaybeWrappedI* pc_num_up,
-                                                 i_t n_vars,
-                                                 const std::vector<i_t>& fractional,
-                                                 const std::vector<f_t>& solution,
-                                                 const std::vector<f_t>& incumbent)
-{
-  const i_t num_fractional = fractional.size();
-  if (num_fractional == 0) return {-1, rounding_direction_t::NONE};
-
-  auto avgs = compute_pseudo_cost_averages(pc_sum_down, pc_sum_up, pc_num_down, pc_num_up, n_vars);
-
-  i_t branch_var                 = fractional[0];
-  f_t max_score                  = std::numeric_limits<f_t>::lowest();
-  rounding_direction_t round_dir = rounding_direction_t::DOWN;
-  constexpr f_t eps              = f_t(1e-6);
-
-  for (i_t j : fractional) {
-    f_t f_down    = solution[j] - std::floor(solution[j]);
-    f_t f_up      = std::ceil(solution[j]) - solution[j];
-    f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j]));
-    f_t up_dist   = std::abs(std::ceil(solution[j]) - incumbent[j]);
-    rounding_direction_t dir =
-      down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP;
-
-    f_t pc_down = pc_num_down[j] != 0 ? (f_t)pc_sum_down[j] / (f_t)pc_num_down[j] : avgs.down_avg;
-    f_t pc_up   = pc_num_up[j] != 0 ? (f_t)pc_sum_up[j] / (f_t)pc_num_up[j] : avgs.up_avg;
-
-    f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up;
-    f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down;
-    f_t score  = (score1 + score2) / 6;
-
-    if (score > max_score) {
-      max_score  = score;
-      branch_var = j;
-      round_dir  = dir;
-    }
-  }
-
-  return {branch_var, round_dir};
-}
-
-template <typename i_t, typename f_t>
-class pseudo_cost_snapshot_t {
- public:
-  pseudo_cost_snapshot_t() = default;
-
-  pseudo_cost_snapshot_t(std::vector<f_t> sum_down,
-                         std::vector<f_t> sum_up,
-                         std::vector<i_t> num_down,
-                         std::vector<i_t> num_up)
-    : sum_down_(std::move(sum_down)),
-      sum_up_(std::move(sum_up)),
-      num_down_(std::move(num_down)),
-      num_up_(std::move(num_up))
-  {
-  }
-
-  i_t variable_selection(const std::vector<i_t>& fractional, const std::vector<f_t>& solution) const
-  {
-    return variable_selection_from_pseudo_costs(sum_down_.data(),
-                                                sum_up_.data(),
-                                                num_down_.data(),
-                                                num_up_.data(),
-                                                n_vars(),
-                                                fractional,
-                                                solution);
-  }
-
-  f_t obj_estimate(const std::vector<i_t>& fractional,
-                   const std::vector<f_t>& solution,
-                   f_t lower_bound) const
-  {
-    return obj_estimate_from_arrays(sum_down_.data(),
-                                    sum_up_.data(),
-                                    num_down_.data(),
-                                    num_up_.data(),
-                                    n_vars(),
-                                    fractional,
-                                    solution,
-                                    lower_bound);
-  }
-
-  branch_variable_t<i_t> pseudocost_diving(const std::vector<i_t>& fractional,
-                                           const std::vector<f_t>& solution,
-                                           const std::vector<f_t>& root_solution) const
-  {
-    return pseudocost_diving_from_arrays(sum_down_.data(),
-                                         sum_up_.data(),
-                                         num_down_.data(),
-                                         num_up_.data(),
-                                         n_vars(),
-                                         fractional,
-                                         solution,
-                                         root_solution);
-  }
-
-  branch_variable_t<i_t> guided_diving(const std::vector<i_t>& fractional,
-                                       const std::vector<f_t>& solution,
-                                       const std::vector<f_t>& incumbent) const
-  {
-    return guided_diving_from_arrays(sum_down_.data(),
-                                     sum_up_.data(),
-                                     num_down_.data(),
-                                     num_up_.data(),
-                                     n_vars(),
-                                     fractional,
-                                     solution,
-                                     incumbent);
-  }
-
-  void queue_update(
-    i_t variable, rounding_direction_t direction, f_t delta, double clock, int worker_id)
-  {
-    updates_.push_back({variable, direction, delta, clock, worker_id});
-    if (direction == rounding_direction_t::DOWN) {
-      sum_down_[variable] += delta;
-      num_down_[variable]++;
-    } else {
-      sum_up_[variable] += delta;
-      num_up_[variable]++;
-    }
-  }
-
-  std::vector<pseudo_cost_update_t<i_t, f_t>> take_updates()
-  {
-    std::vector<pseudo_cost_update_t<i_t, f_t>> result;
-    result.swap(updates_);
-    return result;
-  }
-
-  i_t n_vars() const { return (i_t)sum_down_.size(); }
-
-  std::vector<f_t> sum_down_;
-  std::vector<f_t> sum_up_;
-  std::vector<i_t> num_down_;
-  std::vector<i_t> num_up_;
-
- private:
-  std::vector<pseudo_cost_update_t<i_t, f_t>> updates_;
-};
-
 template <typename i_t, typename f_t>
 struct reliability_branching_settings_t {
   // Lower bound for the maximum number of LP iterations for a single trial branching
@@ -413,6 +66,12 @@ struct reliability_branching_settings_t {
   bool rank_candidates_with_dual_pivot = true;
 };
 
+template <typename i_t>
+struct branch_variable_t {
+  i_t variable;
+  rounding_direction_t direction;
+};
+
 template <typename i_t, typename f_t>
 struct batch_pdlp_warm_cache_t {
   const raft::handle_t batch_pdlp_handle{};
@@ -426,8 +85,58 @@ struct batch_pdlp_warm_cache_t {
 };
 
 template <typename i_t, typename f_t>
+struct pseudo_cost_averages_t {
+  f_t down_avg;
+  i_t num_init_down;
+  f_t up_avg;
+  i_t num_init_up;
+};
+
+template <typename i_t, typename f_t>
+struct pseudo_cost_update_t {
+  i_t variable;
+  rounding_direction_t direction;
+  f_t delta;
+  double work_timestamp;
+  int worker_id;
+
+  bool operator<(const pseudo_cost_update_t& other) const
+  {
+    if (work_timestamp != other.work_timestamp) return work_timestamp < other.work_timestamp;
+    if (variable != other.variable) return variable < other.variable;
+    if (delta != other.delta) return delta < other.delta;
+    return worker_id < other.worker_id;
+  }
+};
+
+// `BnBMode` specify how we control the memory accesses:
+// - If `BnBMode == branch_and_bound_mode_t::PARALLEL`, then we assume that this object is shared
+// among the B&B threads, and thus, require atomics and mutexes to avoid data races.
+//  - If `BnBMode == branch_and_bound_mode_t::DETERMINISTIC`, then each thread has it own pseudocost
+// snapshot, hence, we can disable all atomics and mutexes.
+// `BnBMode` is automatically set depending if it is a `pseudo_costs_t` (PARALLEL)
+// or a `pseudo_costs_snapshot_t` (DETERMINISTIC).
+template <typename i_t,
+          typename f_t,
+          branch_and_bound_mode_t BnBMode = branch_and_bound_mode_t::PARALLEL>
 class pseudo_costs_t {
  public:
+  // Define the types used for storing the pseudocost of each variable.
+  // Disable or enable atomics depending on if we are in REGULAR or DETERMINISTIC modes
+  using float_type =
+    std::conditional_t<BnBMode == branch_and_bound_mode_t::PARALLEL, omp_atomic_t<f_t>, f_t>;
+
+  using int_type =
+    std::conditional_t<BnBMode == branch_and_bound_mode_t::PARALLEL, omp_atomic_t<i_t>, i_t>;
+
+  // Counting the number of LP iterations might require more than an int32 can hold.
+  using int64_type = std::
+    conditional_t<BnBMode == branch_and_bound_mode_t::PARALLEL, omp_atomic_t<int64_t>, int64_t>;
+
+  // Disable or enable mutexes depending on if we are in REGULAR or DETERMINISTIC modes
+  using mutex_type =
+    std::conditional_t<BnBMode == branch_and_bound_mode_t::PARALLEL, omp_mutex_t, fake_omp_mutex_t>;
+
   explicit pseudo_costs_t(i_t num_variables)
     : pseudo_cost_sum_down(num_variables),
       pseudo_cost_sum_up(num_variables),
@@ -435,27 +144,13 @@ class pseudo_costs_t {
       pseudo_cost_num_up(num_variables),
       pseudo_cost_mutex_up(num_variables),
       pseudo_cost_mutex_down(num_variables),
-      AT(1, 1, 1)
+      AT(std::make_shared<csc_matrix_t<i_t, f_t>>(1, 1, 1)),
+      pdlp_warm_cache(std::make_shared<batch_pdlp_warm_cache_t<i_t, f_t>>())
   {
   }
 
   void update_pseudo_costs(mip_node_t<i_t, f_t>* node_ptr, f_t leaf_objective);
 
-  pseudo_cost_snapshot_t<i_t, f_t> create_snapshot() const
-  {
-    const i_t n = (i_t)pseudo_cost_sum_down.size();
-    std::vector<f_t> sd(n), su(n);
-    std::vector<i_t> nd(n), nu(n);
-    for (i_t j = 0; j < n; ++j) {
-      sd[j] = pseudo_cost_sum_down[j];
-      su[j] = pseudo_cost_sum_up[j];
-      nd[j] = pseudo_cost_num_down[j];
-      nu[j] = pseudo_cost_num_up[j];
-    }
-    return pseudo_cost_snapshot_t<i_t, f_t>(
-      std::move(sd), std::move(su), std::move(nd), std::move(nu));
-  }
-
   void merge_updates(const std::vector<pseudo_cost_update_t<i_t, f_t>>& updates)
   {
     for (const auto& upd : updates) {
@@ -479,10 +174,7 @@ class pseudo_costs_t {
     pseudo_cost_mutex_down.resize(num_variables);
   }
 
-  void initialized(i_t& num_initialized_down,
-                   i_t& num_initialized_up,
-                   f_t& pseudo_cost_down_avg,
-                   f_t& pseudo_cost_up_avg) const;
+  pseudo_cost_averages_t<i_t, f_t> compute_averages() const;
 
   f_t obj_estimate(const std::vector<i_t>& fractional,
                    const std::vector<f_t>& solution,
@@ -506,6 +198,8 @@ class pseudo_costs_t {
                                   const lp_problem_t<i_t, f_t>& original_lp);
 
   void update_pseudo_costs_from_strong_branching(const std::vector<i_t>& fractional,
+                                                 const std::vector<f_t>& strong_branch_down,
+                                                 const std::vector<f_t>& strong_branch_up,
                                                  const std::vector<f_t>& root_soln);
 
   uint32_t compute_state_hash() const
@@ -514,31 +208,99 @@ class pseudo_costs_t {
            detail::compute_hash(pseudo_cost_num_down) ^ detail::compute_hash(pseudo_cost_num_up);
   }
 
-  uint32_t compute_strong_branch_hash() const
-  {
-    return detail::compute_hash(strong_branch_down) ^ detail::compute_hash(strong_branch_up);
-  }
-
   f_t calculate_pseudocost_score(i_t j,
                                  const std::vector<f_t>& solution,
-                                 f_t pseudo_cost_up_avg,
-                                 f_t pseudo_cost_down_avg) const;
+                                 pseudo_cost_averages_t<i_t, f_t> averages) const;
 
   reliability_branching_settings_t<i_t, f_t> reliability_branching_settings;
 
-  csc_matrix_t<i_t, f_t> AT;  // Transpose of the constraint matrix A
-  std::vector<omp_atomic_t<f_t>> pseudo_cost_sum_up;
-  std::vector<omp_atomic_t<f_t>> pseudo_cost_sum_down;
-  std::vector<omp_atomic_t<i_t>> pseudo_cost_num_up;
-  std::vector<omp_atomic_t<i_t>> pseudo_cost_num_down;
-  std::vector<f_t> strong_branch_down;
-  std::vector<f_t> strong_branch_up;
-  std::vector<omp_mutex_t> pseudo_cost_mutex_up;
-  std::vector<omp_mutex_t> pseudo_cost_mutex_down;
-  omp_atomic_t<i_t> num_strong_branches_completed = 0;
-  omp_atomic_t<int64_t> strong_branching_lp_iter  = 0;
-
-  batch_pdlp_warm_cache_t<i_t, f_t> pdlp_warm_cache;
+  std::shared_ptr<csc_matrix_t<i_t, f_t>> AT;  // Transpose of the constraint matrix A
+  std::vector<float_type> pseudo_cost_sum_up;
+  std::vector<float_type> pseudo_cost_sum_down;
+  std::vector<int_type> pseudo_cost_num_up;
+  std::vector<int_type> pseudo_cost_num_down;
+  std::vector<mutex_type> pseudo_cost_mutex_up;
+  std::vector<mutex_type> pseudo_cost_mutex_down;
+  int64_type strong_branching_lp_iter = 0;
+
+  std::shared_ptr<batch_pdlp_warm_cache_t<i_t, f_t>> pdlp_warm_cache;
+};
+
+template <typename i_t,
+          typename f_t,
+          branch_and_bound_mode_t BnBMode = branch_and_bound_mode_t::DETERMINISTIC>
+class pseudo_cost_snapshot_t : public pseudo_costs_t<i_t, f_t, BnBMode> {
+ public:
+  using Base = pseudo_costs_t<i_t, f_t, BnBMode>;
+
+  pseudo_cost_snapshot_t(i_t num_variables) : Base(num_variables) {};
+
+  pseudo_cost_snapshot_t(const pseudo_costs_t<i_t, f_t, branch_and_bound_mode_t::PARALLEL>& other)
+    : Base(1)
+  {
+    *this = other;
+  }
+
+  pseudo_cost_snapshot_t(const Base& other) : Base(1) { *this = other; }
+  pseudo_cost_snapshot_t& operator=(
+    const pseudo_costs_t<i_t, f_t, branch_and_bound_mode_t::PARALLEL>& other)
+  {
+    Base::AT              = other.AT;
+    Base::pdlp_warm_cache = other.pdlp_warm_cache;
+
+    i_t n = other.pseudo_cost_num_down.size();
+    Base::pseudo_cost_num_down.resize(n);
+    Base::pseudo_cost_num_up.resize(n);
+    Base::pseudo_cost_sum_down.resize(n);
+    Base::pseudo_cost_sum_up.resize(n);
+
+    for (i_t i = 0; i < n; ++i) {
+      Base::pseudo_cost_num_down[i] = other.pseudo_cost_num_down[i].get_no_atomic();
+      Base::pseudo_cost_num_up[i]   = other.pseudo_cost_num_up[i].get_no_atomic();
+      Base::pseudo_cost_sum_down[i] = other.pseudo_cost_sum_down[i].get_no_atomic();
+      Base::pseudo_cost_sum_up[i]   = other.pseudo_cost_sum_up[i].get_no_atomic();
+    }
+
+    return *this;
+  }
+
+  pseudo_cost_snapshot_t& operator=(const Base& other)
+  {
+    if (this != &other) {
+      Base::AT                   = other.AT;
+      Base::pdlp_warm_cache      = other.pdlp_warm_cache;
+      Base::pseudo_cost_num_down = other.pseudo_cost_num_down;
+      Base::pseudo_cost_num_up   = other.pseudo_cost_num_up;
+      Base::pseudo_cost_sum_down = other.pseudo_cost_sum_down;
+      Base::pseudo_cost_sum_up   = other.pseudo_cost_sum_up;
+    }
+    return *this;
+  };
+
+  void queue_update(
+    i_t variable, rounding_direction_t direction, f_t delta, double clock, int worker_id)
+  {
+    updates_.push_back({variable, direction, delta, clock, worker_id});
+    if (direction == rounding_direction_t::DOWN) {
+      Base::pseudo_cost_sum_down[variable] += delta;
+      ++Base::pseudo_cost_num_down[variable];
+    } else {
+      Base::pseudo_cost_sum_up[variable] += delta;
+      ++Base::pseudo_cost_num_up[variable];
+    }
+  }
+
+  std::vector<pseudo_cost_update_t<i_t, f_t>> take_updates()
+  {
+    std::vector<pseudo_cost_update_t<i_t, f_t>> result;
+    result.swap(updates_);
+    return result;
+  }
+
+  i_t n_vars() const { return Base::pseudo_cost_sum_down.size(); }
+
+ private:
+  std::vector<pseudo_cost_update_t<i_t, f_t>> updates_;
 };
 
 template <typename i_t, typename f_t>
diff --git a/cpp/src/branch_and_bound/branch_and_bound_worker.hpp b/cpp/src/branch_and_bound/worker.hpp
similarity index 52%
rename from cpp/src/branch_and_bound/branch_and_bound_worker.hpp
rename to cpp/src/branch_and_bound/worker.hpp
index 4de2b43cae..87689e57bb 100644
--- a/cpp/src/branch_and_bound/branch_and_bound_worker.hpp
+++ b/cpp/src/branch_and_bound/worker.hpp
@@ -7,36 +7,19 @@
 
 #pragma once
 
+#include <branch_and_bound/constants.hpp>
 #include <branch_and_bound/mip_node.hpp>
 
 #include <dual_simplex/basis_updates.hpp>
 #include <dual_simplex/bounds_strengthening.hpp>
-#include <dual_simplex/phase2.hpp>
 
 #include <utilities/pcgenerator.hpp>
 
-#include <array>
 #include <deque>
-#include <mutex>
 #include <vector>
 
 namespace cuopt::linear_programming::dual_simplex {
 
-constexpr int num_search_strategies = 5;
-
-// Indicate the search and variable selection algorithms used by each thread
-// in B&B (See [1]).
-//
-// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin,
-// Berlin, 2007. doi: 10.14279/depositonce-1634.
-enum search_strategy_t : int {
-  BEST_FIRST         = 0,  // Best-First + Plunging.
-  PSEUDOCOST_DIVING  = 1,  // Pseudocost diving (9.2.5)
-  LINE_SEARCH_DIVING = 2,  // Line search diving (9.2.4)
-  GUIDED_DIVING      = 3,  // Guided diving (9.2.3).
-  COEFFICIENT_DIVING = 4   // Coefficient diving (9.2.1)
-};
-
 template <typename i_t, typename f_t>
 struct branch_and_bound_stats_t {
   f_t start_time                         = 0.0;
@@ -116,9 +99,8 @@ class branch_and_bound_worker_t {
                    const lp_problem_t<i_t, f_t>& original_lp,
                    const simplex_solver_settings_t<i_t, f_t>& settings)
   {
-    internal_node = node->detach_copy();
-    start_node    = &internal_node;
-
+    internal_node   = node->detach_copy();
+    start_node      = &internal_node;
     start_lower     = original_lp.lower;
     start_upper     = original_lp.upper;
     search_strategy = type;
@@ -130,7 +112,7 @@ class branch_and_bound_worker_t {
     return node_presolver.bounds_strengthening(settings, bounds_changed, start_lower, start_upper);
   }
 
-  // Set the variables bounds for the LP relaxation of the current node.
+  // Set the variables bounds for the LP relaxation in the current node.
   bool set_lp_variable_bounds(mip_node_t<i_t, f_t>* node_ptr,
                               const simplex_solver_settings_t<i_t, f_t>& settings)
   {
@@ -162,120 +144,4 @@ class branch_and_bound_worker_t {
   mip_node_t<i_t, f_t> internal_node;
 };
 
-template <typename i_t, typename f_t>
-class branch_and_bound_worker_pool_t {
- public:
-  void init(i_t num_workers,
-            const lp_problem_t<i_t, f_t>& original_lp,
-            const csr_matrix_t<i_t, f_t>& Arow,
-            const std::vector<variable_type_t>& var_type,
-            const simplex_solver_settings_t<i_t, f_t>& settings)
-  {
-    workers_.resize(num_workers);
-    num_idle_workers_ = num_workers;
-    for (i_t i = 0; i < num_workers; ++i) {
-      workers_[i] = std::make_unique<branch_and_bound_worker_t<i_t, f_t>>(
-        i, original_lp, Arow, var_type, settings);
-      idle_workers_.push_front(i);
-    }
-
-    is_initialized = true;
-  }
-
-  // Here, we are assuming that the scheduler is the only
-  // thread that can retrieve/pop an idle worker.
-  branch_and_bound_worker_t<i_t, f_t>* get_idle_worker()
-  {
-    std::lock_guard<omp_mutex_t> lock(mutex_);
-    if (idle_workers_.empty()) {
-      return nullptr;
-    } else {
-      i_t idx = idle_workers_.front();
-      return workers_[idx].get();
-    }
-  }
-
-  // Here, we are assuming that the scheduler is the only
-  // thread that can retrieve/pop an idle worker.
-  void pop_idle_worker()
-  {
-    std::lock_guard<omp_mutex_t> lock(mutex_);
-    if (!idle_workers_.empty()) {
-      idle_workers_.pop_front();
-      num_idle_workers_--;
-    }
-  }
-
-  void return_worker_to_pool(branch_and_bound_worker_t<i_t, f_t>* worker)
-  {
-    worker->is_active = false;
-    std::lock_guard<omp_mutex_t> lock(mutex_);
-    idle_workers_.push_back(worker->worker_id);
-    num_idle_workers_++;
-  }
-
-  f_t get_lower_bound()
-  {
-    f_t lower_bound = std::numeric_limits<f_t>::infinity();
-
-    if (is_initialized) {
-      for (i_t i = 0; i < workers_.size(); ++i) {
-        if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) {
-          lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound);
-        }
-      }
-    }
-
-    return lower_bound;
-  }
-
-  i_t num_idle_workers() { return num_idle_workers_; }
-
- private:
-  // Worker pool
-  std::vector<std::unique_ptr<branch_and_bound_worker_t<i_t, f_t>>> workers_;
-  bool is_initialized = false;
-
-  omp_mutex_t mutex_;
-  std::deque<i_t> idle_workers_;
-  omp_atomic_t<i_t> num_idle_workers_;
-};
-
-template <typename f_t, typename i_t>
-std::vector<search_strategy_t> get_search_strategies(
-  diving_heuristics_settings_t<i_t, f_t> settings)
-{
-  std::vector<search_strategy_t> types;
-  types.reserve(num_search_strategies);
-  types.push_back(BEST_FIRST);
-  if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); }
-  if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); }
-  if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); }
-  if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); }
-  return types;
-}
-
-template <typename i_t>
-std::array<i_t, num_search_strategies> get_max_workers(
-  i_t num_workers, const std::vector<search_strategy_t>& strategies)
-{
-  std::array<i_t, num_search_strategies> max_num_workers;
-  max_num_workers.fill(0);
-
-  i_t bfs_workers             = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1);
-  max_num_workers[BEST_FIRST] = bfs_workers;
-
-  i_t diving_workers = (num_workers - bfs_workers);
-  i_t m              = strategies.size() - 1;
-
-  for (size_t i = 1, k = 0; i < strategies.size(); ++i) {
-    i_t start                      = (double)k * diving_workers / m;
-    i_t end                        = (double)(k + 1) * diving_workers / m;
-    max_num_workers[strategies[i]] = end - start;
-    ++k;
-  }
-
-  return max_num_workers;
-}
-
 }  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/branch_and_bound/worker_pool.hpp b/cpp/src/branch_and_bound/worker_pool.hpp
new file mode 100644
index 0000000000..2b52b6e7bf
--- /dev/null
+++ b/cpp/src/branch_and_bound/worker_pool.hpp
@@ -0,0 +1,130 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <branch_and_bound/worker.hpp>
+
+namespace cuopt::linear_programming::dual_simplex {
+
+template <typename i_t, typename f_t>
+class branch_and_bound_worker_pool_t {
+ public:
+  void init(i_t num_workers,
+            const lp_problem_t<i_t, f_t>& original_lp,
+            const csr_matrix_t<i_t, f_t>& Arow,
+            const std::vector<variable_type_t>& var_type,
+            const simplex_solver_settings_t<i_t, f_t>& settings)
+  {
+    workers_.resize(num_workers);
+    num_idle_workers_ = num_workers;
+    for (i_t i = 0; i < num_workers; ++i) {
+      workers_[i] = std::make_unique<branch_and_bound_worker_t<i_t, f_t>>(
+        i, original_lp, Arow, var_type, settings);
+      idle_workers_.push_front(i);
+    }
+
+    is_initialized = true;
+  }
+
+  // Here, we are assuming that the scheduler is the only
+  // thread that can retrieve/pop an idle worker.
+  branch_and_bound_worker_t<i_t, f_t>* get_idle_worker()
+  {
+    std::lock_guard<omp_mutex_t> lock(mutex_);
+    if (idle_workers_.empty()) {
+      return nullptr;
+    } else {
+      i_t idx = idle_workers_.front();
+      return workers_[idx].get();
+    }
+  }
+
+  // Here, we are assuming that the scheduler is the only
+  // thread that can retrieve/pop an idle worker.
+  void pop_idle_worker()
+  {
+    std::lock_guard<omp_mutex_t> lock(mutex_);
+    if (!idle_workers_.empty()) {
+      idle_workers_.pop_front();
+      num_idle_workers_--;
+    }
+  }
+
+  void return_worker_to_pool(branch_and_bound_worker_t<i_t, f_t>* worker)
+  {
+    worker->is_active = false;
+    std::lock_guard<omp_mutex_t> lock(mutex_);
+    idle_workers_.push_back(worker->worker_id);
+    num_idle_workers_++;
+  }
+
+  f_t get_lower_bound()
+  {
+    f_t lower_bound = std::numeric_limits<f_t>::infinity();
+
+    if (is_initialized) {
+      for (i_t i = 0; i < workers_.size(); ++i) {
+        if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) {
+          lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound);
+        }
+      }
+    }
+
+    return lower_bound;
+  }
+
+  i_t num_idle_workers() { return num_idle_workers_; }
+
+ private:
+  // Worker pool
+  std::vector<std::unique_ptr<branch_and_bound_worker_t<i_t, f_t>>> workers_;
+  bool is_initialized = false;
+
+  omp_mutex_t mutex_;
+  std::deque<i_t> idle_workers_;
+  omp_atomic_t<i_t> num_idle_workers_;
+};
+
+template <typename f_t, typename i_t>
+std::vector<search_strategy_t> get_search_strategies(
+  diving_heuristics_settings_t<i_t, f_t> settings)
+{
+  std::vector<search_strategy_t> types;
+  types.reserve(num_search_strategies);
+  types.push_back(BEST_FIRST);
+  if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); }
+  if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); }
+  if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); }
+  if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); }
+  return types;
+}
+
+template <typename i_t>
+std::array<i_t, num_search_strategies> get_max_workers(
+  i_t num_workers, const std::vector<search_strategy_t>& strategies)
+{
+  std::array<i_t, num_search_strategies> max_num_workers;
+  max_num_workers.fill(0);
+
+  i_t bfs_workers             = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1);
+  max_num_workers[BEST_FIRST] = bfs_workers;
+
+  i_t diving_workers = (num_workers - bfs_workers);
+  i_t m              = strategies.size() - 1;
+
+  for (size_t i = 1, k = 0; i < strategies.size(); ++i) {
+    i_t start                      = (double)k * diving_workers / m;
+    i_t end                        = (double)(k + 1) * diving_workers / m;
+    max_num_workers[strategies[i]] = end - start;
+    ++k;
+  }
+
+  return max_num_workers;
+}
+
+}  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp
index f6e66472dd..8890a7487a 100644
--- a/cpp/src/utilities/omp_helpers.hpp
+++ b/cpp/src/utilities/omp_helpers.hpp
@@ -54,6 +54,15 @@ class omp_mutex_t {
   std::unique_ptr<omp_lock_t> mutex;
 };
 
+// Empty class with the same methods as `omp_mutex_t`. This is mainly used for cleanly disabling
+// the `omp_mutex_t` via type alias (`lock` and `unlock` are replaced by NOOPs).
+class fake_omp_mutex_t {
+ public:
+  static void lock() {}
+  static void unlock() {}
+  static bool try_lock() { return true; }
+};
+
 // Wrapper for omp atomic operations. See
 // https://www.openmp.org/spec-html/5.1/openmpsu105.html.
 template <typename T>
@@ -117,6 +126,11 @@ class omp_atomic_t {
 
   T fetch_sub(T inc) { return fetch_add(-inc); }
 
+  // Get the underlying value without atomics
+  T& get_no_atomic() { return val; }
+
+  T get_no_atomic() const { return val; }
+
  private:
   T val;