From 96179e7539065cd62d1436cb6ece008a0049f958 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Wed, 11 Jun 2025 13:43:06 -0700
Subject: [PATCH 1/2] Fix accidental O(N^2) BFRT with O(N log N). 13% better on
 NETLIB

---
 cpp/src/dual_simplex/CMakeLists.txt           |   1 +
 .../bound_flipping_ratio_test.cpp             | 259 ++++++++++++++++++
 .../bound_flipping_ratio_test.hpp             |  90 ++++++
 cpp/src/dual_simplex/phase2.cpp               |  45 ++-
 4 files changed, 393 insertions(+), 2 deletions(-)
 create mode 100644 cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
 create mode 100644 cpp/src/dual_simplex/bound_flipping_ratio_test.hpp
diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt
index 16ee502f83..baa5b7213b 100644
--- a/cpp/src/dual_simplex/CMakeLists.txt
+++ b/cpp/src/dual_simplex/CMakeLists.txt
@@ -16,6 +16,7 @@
 set(DUAL_SIMPLEX_SRC_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/basis_solves.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/basis_updates.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/bound_flipping_ratio_test.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/crossover.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/initial_basis.cpp
diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
new file mode 100644
index 0000000000..5cfa35c748
--- /dev/null
+++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
@@ -0,0 +1,259 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dual_simplex/bound_flipping_ratio_test.hpp>
+
+#include <dual_simplex/tic_toc.hpp>
+
+#include <algorithm>
+
+namespace cuopt::linear_programming::dual_simplex {
+
+template <typename i_t, typename f_t>
+i_t bound_flipping_ratio_test_t<i_t, f_t>::compute_breakpoints(std::vector<i_t>& indicies,
+                                                               std::vector<f_t>& ratios)
+{
+  i_t n                  = n_;
+  i_t m                  = m_;
+  constexpr bool verbose = false;
+  f_t pivot_tol          = settings_.pivot_tol;
+  const f_t dual_tol     = settings_.dual_tol / 10;
+
+  i_t idx = 0;
+  while (idx == 0 && pivot_tol > 1e-12) {
+    for (i_t k = 0; k < n - m; ++k) {
+      const i_t j = nonbasic_list_[k];
+      if (vstatus_[j] == variable_status_t::NONBASIC_FIXED) { continue; }
+      if (vstatus_[j] == variable_status_t::NONBASIC_LOWER && delta_z_[j] < -pivot_tol) {
+        indicies[idx] = k;
+        ratios[idx]   = std::max((-dual_tol - z_[j]) / delta_z_[j], 0.0);
+        if constexpr (verbose) { settings_.log.printf("ratios[%d] = %e\n", idx, ratios[idx]); }
+        idx++;
+      }
+      if (vstatus_[j] == variable_status_t::NONBASIC_UPPER && delta_z_[j] > pivot_tol) {
+        indicies[idx] = k;
+        ratios[idx]   = std::max((dual_tol - z_[j]) / delta_z_[j], 0.0);
+        if constexpr (verbose) { settings_.log.printf("ratios[%d] = %e\n", idx, ratios[idx]); }
+        idx++;
+      }
+    }
+    pivot_tol /= 10;
+  }
+  return idx;
+}
+
+template <typename i_t, typename f_t>
+i_t bound_flipping_ratio_test_t<i_t, f_t>::single_pass(i_t start,
+                                                       i_t end,
+                                                       const std::vector<i_t>& indicies,
+                                                       const std::vector<f_t>& ratios,
+                                                       f_t& slope,
+                                                       f_t& step_length,
+                                                       i_t& nonbasic_entering,
+                                                       i_t& entering_index)
+{
+  // Find the minimum ratio
+  f_t min_val    = inf;
+  entering_index = -1;
+  i_t candidate  = -1;
+  f_t zero_tol   = settings_.zero_tol;
+  i_t k_idx      = -1;
+  for (i_t k = start; k < end; ++k) {
+    if (ratios[k] < min_val) {
+      min_val   = ratios[k];
+      candidate = indicies[k];
+      k_idx     = k;
+    } else if (ratios[k] < min_val + zero_tol) {
+      // Use Harris to select variables with larger pivots
+      const i_t j = nonbasic_list_[indicies[k]];
+      if (std::abs(delta_z_[j]) > std::abs(delta_z_[candidate])) {
+        min_val   = ratios[k];
+        candidate = indicies[k];
+        k_idx     = k;
+      }
+    }
+  }
+  step_length       = min_val;
+  nonbasic_entering = candidate;
+  const i_t j = entering_index = nonbasic_list_[nonbasic_entering];
+
+  constexpr bool verbose = false;
+  if (lower_[j] > -inf && upper_[j] < inf && lower_[j] != upper_[j]) {
+    const f_t interval    = upper_[j] - lower_[j];
+    const f_t delta_slope = std::abs(delta_z_[j]) * interval;
+    if constexpr (verbose) {
+      settings_.log.printf("single pass delta slope %e slope %e after slope %e step length %e\n",
+                           delta_slope,
+                           slope,
+                           slope - delta_slope,
+                           step_length);
+    }
+    slope -= delta_slope;
+    return k_idx;  // we should see if we can continue to increase the step-length
+  }
+  return -1;  // we are done. do not increase the step-length further
+}
+
+template <typename i_t, typename f_t>
+i_t bound_flipping_ratio_test_t<i_t, f_t>::compute_step_length(f_t& step_length,
+                                                               i_t& nonbasic_entering)
+{
+  i_t m                  = m_;
+  i_t n                  = n_;
+  constexpr bool verbose = false;
+
+  // Compute the initial set of breakpoints
+  std::vector<i_t> indicies(n - m);
+  std::vector<f_t> ratios(n - m);
+  i_t num_breakpoints = compute_breakpoints(indicies, ratios);
+  if constexpr (verbose) { settings_.log.printf("Initial breakpoints %d\n", num_breakpoints); }
+  if (num_breakpoints == 0) {
+    nonbasic_entering = -1;
+    return -1;
+  }
+
+  f_t slope          = slope_;
+  nonbasic_entering  = -1;
+  i_t entering_index = -1;
+
+  i_t k_idx = single_pass(
+    0, num_breakpoints, indicies, ratios, slope, step_length, nonbasic_entering, entering_index);
+  bool continue_search = k_idx >= 0 && num_breakpoints > 1 && slope > 0.0;
+  if (!continue_search) {
+    if constexpr (verbose) {
+      settings_.log.printf(
+        "BFRT stopping. No bound flips. Step length %e Nonbasic entering %d Entering %d.\n",
+        step_length,
+        nonbasic_entering,
+        entering_index);
+    }
+    return entering_index;
+  }
+
+  if constexpr (verbose) {
+    settings_.log.printf(
+      "Continuing past initial step length %e entering index %d nonbasic entering %d slope %e\n",
+      step_length,
+      entering_index,
+      nonbasic_entering,
+      slope);
+  }
+
+  // Continue the search using a heap to order the breakpoints
+  ratios[k_idx]   = ratios[num_breakpoints - 1];
+  indicies[k_idx] = indicies[num_breakpoints - 1];
+
+  heap_passes(
+    indicies, ratios, num_breakpoints - 1, slope, step_length, nonbasic_entering, entering_index);
+
+  if constexpr (verbose) {
+    settings_.log.printf("BFRT step length %e entering index %d non basic entering %d\n",
+                         step_length,
+                         entering_index,
+                         nonbasic_entering);
+  }
+  return entering_index;
+}
+
+template <typename i_t, typename f_t>
+void bound_flipping_ratio_test_t<i_t, f_t>::heap_passes(const std::vector<i_t>& current_indicies,
+                                                        const std::vector<f_t>& current_ratios,
+                                                        i_t num_breakpoints,
+                                                        f_t& slope,
+                                                        f_t& step_length,
+                                                        i_t& nonbasic_entering,
+                                                        i_t& entering_index)
+{
+  std::vector<i_t> bare_idx(num_breakpoints);
+  constexpr bool verbose                = false;
+  const f_t dual_tol                    = settings_.dual_tol;
+  const f_t zero_tol                    = settings_.zero_tol;
+  const std::vector<f_t>& delta_z       = delta_z_;
+  const std::vector<i_t>& nonbasic_list = nonbasic_list_;
+  const i_t N                           = num_breakpoints;
+  for (i_t k = 0; k < N; ++k) {
+    bare_idx[k] = k;
+    if constexpr (verbose) {
+      settings_.log.printf("Adding index %d ratio %e pivot %e to heap\n",
+                           current_indicies[k],
+                           current_ratios[k],
+                           std::abs(delta_z[nonbasic_list[current_indicies[k]]]));
+    }
+  }
+
+  auto compare = [zero_tol, &current_ratios, &current_indicies, &delta_z, &nonbasic_list](
+                   const i_t& a, const i_t& b) {
+    return (current_ratios[a] > current_ratios[b]) ||
+           (std::abs(current_ratios[a] - current_ratios[b]) < zero_tol &&
+            std::abs(delta_z[nonbasic_list[current_indicies[a]]]) >
+              std::abs(delta_z[nonbasic_list[current_indicies[b]]]));
+  };
+
+  std::make_heap(bare_idx.begin(), bare_idx.end(), compare);
+
+  while (bare_idx.size() > 0 && slope > 0) {
+    // Remove minimum ratio from the heap and rebalance
+    i_t heap_index = bare_idx.front();
+    std::pop_heap(bare_idx.begin(), bare_idx.end(), compare);
+    bare_idx.pop_back();
+
+    nonbasic_entering = current_indicies[heap_index];
+    const i_t j = entering_index = nonbasic_list_[nonbasic_entering];
+    step_length                  = current_ratios[heap_index];
+
+    if (lower_[j] > -inf && upper_[j] < inf && lower_[j] != upper_[j]) {
+      // We have a bounded variable
+      const f_t interval    = upper_[j] - lower_[j];
+      const f_t delta_slope = std::abs(delta_z_[j]) * interval;
+      const f_t pivot       = std::abs(delta_z[j]);
+      if constexpr (verbose) {
+        settings_.log.printf(
+          "heap %d step-length %.12e pivot %e nonbasic entering %d slope %e delta_slope %e new "
+          "slope %e\n",
+          bare_idx.size(),
+          current_ratios[heap_index],
+          pivot,
+          nonbasic_entering,
+          slope,
+          delta_slope,
+          slope - delta_slope);
+      }
+      slope -= delta_slope;
+    } else {
+      // The variable is not bounded. Stop the search.
+      break;
+    }
+
+    if (toc(start_time_) > settings_.time_limit) {
+      entering_index = -2;
+      return;
+    }
+    if (settings_.concurrent_halt != nullptr &&
+        settings_.concurrent_halt->load(std::memory_order_acquire) == 1) {
+      entering_index = -3;
+      return;
+    }
+  }
+}
+
+#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
+
+template class bound_flipping_ratio_test_t<int, double>;
+
+#endif
+
+}  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp
new file mode 100644
index 0000000000..c949302562
--- /dev/null
+++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp
@@ -0,0 +1,90 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <dual_simplex/initial_basis.hpp>
+#include <dual_simplex/simplex_solver_settings.hpp>
+
+#include <vector>
+
+namespace cuopt::linear_programming::dual_simplex {
+
+template <typename i_t, typename f_t>
+class bound_flipping_ratio_test_t {
+ public:
+  bound_flipping_ratio_test_t(const simplex_solver_settings_t<i_t, f_t>& settings,
+                              f_t start_time,
+                              i_t m,
+                              i_t n,
+                              f_t initial_slope,
+                              const std::vector<f_t>& lower,
+                              const std::vector<f_t>& upper,
+                              const std::vector<variable_status_t>& vstatus,
+                              const std::vector<i_t>& nonbasic_list,
+                              std::vector<f_t>& z,
+                              std::vector<f_t>& delta_z)
+    : settings_(settings),
+      start_time_(start_time),
+      m_(m),
+      n_(n),
+      slope_(initial_slope),
+      lower_(lower),
+      upper_(upper),
+      vstatus_(vstatus),
+      nonbasic_list_(nonbasic_list),
+      z_(z),
+      delta_z_(delta_z)
+  {
+  }
+
+  i_t compute_step_length(f_t& step_length, i_t& nonbasic_entering);
+
+ private:
+  i_t compute_breakpoints(std::vector<i_t>& indices, std::vector<f_t>& ratios);
+  i_t single_pass(i_t start,
+                  i_t end,
+                  const std::vector<i_t>& indices,
+                  const std::vector<f_t>& ratios,
+                  f_t& slope,
+                  f_t& step_length,
+                  i_t& nonbasic_entering,
+                  i_t& enetering_index);
+  void heap_passes(const std::vector<i_t>& current_indicies,
+                   const std::vector<f_t>& current_ratios,
+                   i_t num_breakpoints,
+                   f_t& slope,
+                   f_t& step_lenght,
+                   i_t& nonbasic_entering,
+                   i_t& entering_index);
+
+  const std::vector<f_t>& lower_;
+  const std::vector<f_t>& upper_;
+  const std::vector<i_t>& nonbasic_list_;
+  const std::vector<variable_status_t>& vstatus_;
+  const std::vector<f_t>& z_;
+  const std::vector<f_t>& delta_z_;
+
+  const simplex_solver_settings_t<i_t, f_t>& settings_;
+
+  f_t start_time_;
+  f_t slope_;
+
+  i_t n_;
+  i_t m_;
+};
+
+}  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index cbfc66a222..7b12f8d10a 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -17,6 +17,7 @@
 
 #include <dual_simplex/basis_solves.hpp>
 #include <dual_simplex/basis_updates.hpp>
+#include <dual_simplex/bound_flipping_ratio_test.hpp>
 #include <dual_simplex/initial_basis.hpp>
 #include <dual_simplex/phase1.hpp>
 #include <dual_simplex/phase2.hpp>
@@ -428,7 +429,7 @@ i_t bound_flipping_ratio_test(const lp_problem_t<i_t, f_t>& lp,
       const f_t delta_slope = std::abs(delta_z[j]) * interval;
 #ifdef BOUND_FLIP_DEBUG
       if (slope - delta_slope > 0) {
-        log.printf(
+        settings.log.printf(
           "Bound flip %d slope change %e prev slope %e slope %e. curr step "
           "length %e\n",
           j,
@@ -1234,6 +1235,7 @@ dual::status_t dual_phase2(i_t phase,
   std::vector<f_t> delta_z(n);
   std::vector<f_t> delta_x(n);
   const i_t start_iter = iter;
+  f_t bfrt_time        = 0;
   while (iter < iter_limit) {
     // Pricing
     i_t direction;
@@ -1356,6 +1358,41 @@ dual::status_t dual_phase2(i_t phase,
                                                    step_length,
                                                    nonbasic_entering_index);
     } else if (bound_flip_ratio) {
+      f_t bfrt_start = tic();
+#if 0
+      f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index])
+                             : (x[leaving_index] - lp.upper[leaving_index]);
+      bound_flipping_ratio_test_t<i_t, f_t> bfrt(settings, start_time, m, n, slope, lp.lower, lp.upper, vstatus, nonbasic_list, z, delta_z);
+      entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index);
+      if constexpr (0)
+      {
+        f_t shadow_step_length;
+        i_t shadow_nonbasic_entering_index;
+        i_t shadow_entering_index = phase2::bound_flipping_ratio_test(lp,
+                                                         settings,
+                                                         start_time,
+                                                         vstatus,
+                                                         nonbasic_list,
+                                                         x,
+                                                         z,
+                                                         delta_z,
+                                                         direction,
+                                                         leaving_index,
+                                                         shadow_step_length,
+                                                         shadow_nonbasic_entering_index);
+        if (shadow_nonbasic_entering_index != nonbasic_entering_index)
+        {
+          settings.log.printf(
+            "step diff %e shadow step length %e step length %e shadow nonbasic entering %d "
+            "nonbasic entering %d\n",
+            step_length - shadow_step_length,
+            shadow_step_length,
+            step_length,
+            shadow_nonbasic_entering_index,
+            nonbasic_entering_index);
+        }
+      }
+#else
       entering_index = phase2::bound_flipping_ratio_test(lp,
                                                          settings,
                                                          start_time,
@@ -1368,6 +1405,8 @@ dual::status_t dual_phase2(i_t phase,
                                                          leaving_index,
                                                          step_length,
                                                          nonbasic_entering_index);
+#endif
+      bfrt_time += toc(bfrt_start);
     } else {
       entering_index = phase2::phase2_ratio_test(
         lp, settings, vstatus, nonbasic_list, z, delta_z, step_length, nonbasic_entering_index);
@@ -1603,11 +1642,12 @@ dual::status_t dual_phase2(i_t phase,
       if (phase == 1 && iter == 1) {
         settings.log.printf(" Iter     Objective   Primal Infeas  Perturb  Time\n");
       }
-      settings.log.printf("%5d %+.8e %.8e %.2e %.2f\n",
+      settings.log.printf("%5d %+.16e %.8e %.2e %.2e %.2f\n",
                           iter,
                           compute_user_objective(lp, obj),
                           primal_infeasibility,
                           sum_perturb,
+                          step_length,
                           now);
     }
 
@@ -1624,6 +1664,7 @@ dual::status_t dual_phase2(i_t phase,
     }
   }
   if (iter >= iter_limit) { status = dual::status_t::ITERATION_LIMIT; }
+  settings.log.printf("BFRT time %e\n", bfrt_time);
   return status;
 }
 

From e5ae74524bdf73db9616bf56401d12435bdb62c3 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Thu, 12 Jun 2025 13:22:51 -0700
Subject: [PATCH 2/2] Don't keep looking for small pivots. Can turn infeasible
 into numerical error

---
 cpp/src/dual_simplex/bound_flipping_ratio_test.cpp | 5 +----
 cpp/src/dual_simplex/phase2.cpp                    | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
index 5cfa35c748..96f266ab25 100644
--- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
+++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp
@@ -34,7 +34,6 @@ i_t bound_flipping_ratio_test_t<i_t, f_t>::compute_breakpoints(std::vector<i_t>&
   const f_t dual_tol     = settings_.dual_tol / 10;
 
   i_t idx = 0;
-  while (idx == 0 && pivot_tol > 1e-12) {
     for (i_t k = 0; k < n - m; ++k) {
       const i_t j = nonbasic_list_[k];
       if (vstatus_[j] == variable_status_t::NONBASIC_FIXED) { continue; }
@@ -51,8 +50,6 @@ i_t bound_flipping_ratio_test_t<i_t, f_t>::compute_breakpoints(std::vector<i_t>&
         idx++;
       }
     }
-    pivot_tol /= 10;
-  }
   return idx;
 }
 
@@ -198,7 +195,7 @@ void bound_flipping_ratio_test_t<i_t, f_t>::heap_passes(const std::vector<i_t>&
   auto compare = [zero_tol, &current_ratios, &current_indicies, &delta_z, &nonbasic_list](
                    const i_t& a, const i_t& b) {
     return (current_ratios[a] > current_ratios[b]) ||
-           (std::abs(current_ratios[a] - current_ratios[b]) < zero_tol &&
+           (current_ratios[b] - current_ratios[a] < zero_tol &&
             std::abs(delta_z[nonbasic_list[current_indicies[a]]]) >
               std::abs(delta_z[nonbasic_list[current_indicies[b]]]));
   };
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index 7b12f8d10a..e48a376590 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -1359,7 +1359,7 @@ dual::status_t dual_phase2(i_t phase,
                                                    nonbasic_entering_index);
     } else if (bound_flip_ratio) {
       f_t bfrt_start = tic();
-#if 0
+#if 1
       f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index])
                              : (x[leaving_index] - lp.upper[leaving_index]);
       bound_flipping_ratio_test_t<i_t, f_t> bfrt(settings, start_time, m, n, slope, lp.lower, lp.upper, vstatus, nonbasic_list, z, delta_z);