From 1ba5870870374969b14957c025d8de37265efe94 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Fri, 6 Feb 2026 15:23:15 +0100
Subject: [PATCH 01/13] progress

---
 .../GreedySchedulers/GrowLocalMaxBsp.hpp      | 217 ++++++++++++++++++
 1 file changed, 217 insertions(+)
 create mode 100644 include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
new file mode 100644
index 00000000..fae4df84
--- /dev/null
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -0,0 +1,217 @@
+/*
+Copyright 2026 Huawei Technologies Co., Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+@author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
+*/
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <vector>
+
+#include "osp/bsp/scheduler/MaxBspScheduler.hpp"
+
+namespace osp {
+
+template <typename VertT, typename WeightT>
+struct GrowLocalSSPParams {
+    VertT minSuperstepSize_ = 20;
+    WeightT syncCostMultiplierMinSuperstepWeight_ = 1;
+    WeightT syncCostMultiplierParallelCheck_ = 4;
+};
+
+template <typename GraphT>
+class GrowLocalSSP : public MaxBspScheduler<GraphT> {
+    static_assert(isDirectedGraphV<GraphT>);
+    static_assert(hasVertexWeightsV<GraphT>);
+    static_assert(hasVerticesInTopOrderV<GraphT>);
+    static_assert(hasChildrenInVertexOrderV<GraphT>);
+
+  private:
+    using VertexType = VertexIdxT<GraphT>;
+
+    constexpr std::size_t staleness{2U};
+    GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> params_;
+
+  public:
+    ReturnStatus ComputeSchedule(MaxBspSchedule<GraphT> &schedule);
+
+    std::string GetScheduleName() const override { return "GrowLocalSSP"; }
+};
+
+template <typename GraphT>
+ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &schedule) {
+    const BspInstance<GraphT> &instance = schedule.GetInstance();
+    const GraphT &graph = instance.GetComputationalDag();
+    const VertexType numVertices = graph.NumVertices();
+    const unsigned numProcs = instance.NumberOfProcessors();
+
+    std::set<VertexType> currentlyReady;
+
+    std::array<std::vector<VertexType>, staleness> futureReady;
+    std::vector<VertexType> bestFutureReady;
+
+    std::array<std::vector<std::set<VertexType>>, staleness> procReady(numProcs);
+    std::vector<std::set<VertexType>> bestProcReady(numProcs);
+
+    std::vector<VertexType> predec(numVertices);
+    for (const auto vert : graph.Vertices()) {
+        predec[vert] = graph.InDegree(vert);
+        if (predec[vert] == 0U) {
+            currentlyReady.insert(currentlyReady.end(), vert);
+        }
+    }
+
+    std::vector<std::vector<VertexType>> newAssignments(numProcs);
+    std::vector<std::vector<VertexType>> bestNewAssignments(numProcs);
+
+    // const VWorkwT<GraphT> minWeightParallelCheck = params_.syncCostMultiplierParallelCheck_ * instance.SynchronisationCosts();
+    // const VWorkwT<GraphT> minSuperstepWeight = params_.syncCostMultiplierMinSuperstepWeight_ * instance.SynchronisationCosts();
+    // double desiredParallelism = static_cast<double>(numProcs);
+
+    VertexType totalAssigned = 0;
+    unsigned superStep = 0U;
+
+    while (totalAssigned < numVertices) {
+        unsigned reducedSuperStep = superStep % staleness;
+        std::vector<std::set<VertexType>> &stepProcReady = procReady[reducedSuperStep];
+        std::vector<VertexType> &stepFutureReady = futureReady[reducedSuperStep];
+
+        VertexType limit = params_.minSuperstepSize_;
+        double bestScore = 0;
+        double bestParallelism = 0;
+
+        typename std::set<VertexType>::const_iterator currentlyReadyIter;
+        typename std::set<VertexType>::const_iterator bestcurrentlyReadyIter;
+
+        bool continueSuperstepAttemps = true;
+
+        while (continueSuperstepAttemps) {
+            for (unsigned proc = 0; proc < p; proc++) {
+                newAssignments[proc].clear();
+            }
+            stepFutureReady.clear();
+
+            currentlyReadyIter = currentlyReady.cbegin();
+
+            VertexType newTotalAssigned = 0;
+            VWorkwT<GraphT> weightLimit = 0;
+            VWorkwT<GraphT> totalWeightAssigned = 0;
+
+            // Processor 0
+            constexpr unsigned proc0{0U};
+            while (newAssignments[proc0].size() < limit) {
+                VertexType chosenNode = std::numeric_limits<VertexType>::max();
+                {
+                    const auto procReadyIt = stepProcReady[proc0].cbegin();
+                    if (procReadyIt != stepProcReady[proc0].cend()) {
+                        chosenNode = *procReadyIt;
+                        stepProcReady[proc0].erase(procReadyIt);
+                    } else if (currentlyReadyIter != currentlyReady.cend()) {
+                        chosenNode = *currentlyReadyIter;
+                        ++currentlyReadyIter;
+                    } else {
+                        break;
+                    }
+                }
+
+                newAssignments[proc0].push_back(chosenNode);
+                schedule.SetAssignedProcessor(chosenNode, proc0);
+                ++newTotalAssigned;
+                weightLimit += graph.VertexWorkWeight(chosenNode);
+
+                for (const VertexType &succ : graph.Children(chosenNode)) {
+                    if (--predec[succ] == 0) {
+                        unsigned earliest = 0U;
+                        bool differentProcParent = false;
+                        for (const VertexType &par : graph.Parents(succ)) {
+                            const bool differentProc = (schedule.AssignedProcessor(par) != proc0);
+                            differentProcParent |= differentProc;
+                            earliest = std::max(earliest, static_cast<unsigned>(differentProc) * schedule.AssignedSuperStep(par));
+                        }
+                        earliest += static_cast<unsigned>(differentProcParent) * staleness;
+
+                        if (earliest <= superStep) {
+                            stepProcReady[proc0].emplace(succ);
+                        } else if (earliest < superStep + staleness) {
+                            procReady[earliest % staleness][proc0].emplace(succ);
+                        } else {
+                            stepFutureReady.emplace_back(succ);
+                        }
+                    }
+                }
+            } // end while assigning
+
+            totalWeightAssigned += weightLimit;
+
+            // Processors 1 through P-1
+            for (unsigned proc = 1U; proc < numProcs; ++proc) {
+                VWorkwT<GraphT> currentWeightAssigned = 0;
+                while (currentWeightAssigned < weightLimit) {
+                    VertexType chosenNode = std::numeric_limits<VertexType>::max();
+                    {
+                        const auto procReadyIt = stepProcReady[proc].cbegin();
+                        if (procReadyIt != stepProcReady[proc].cend()) {
+                            chosenNode = *procReadyIt;
+                            stepProcReady[proc].erase(procReadyIt);
+                        } else if (currentlyReadyIter != currentlyReady.cend()) {
+                            chosenNode = *currentlyReadyIter;
+                            ++currentlyReadyIter;
+                        } else {
+                            break;
+                        }
+                    }
+
+                    newAssignments[proc].push_back(chosenNode);
+                    schedule.SetAssignedProcessor(chosenNode, proc);
+                    ++newTotalAssigned;
+                    currentWeightAssigned += graph.VertexWorkWeight(chosenNode);
+
+                    for (const VertexType &succ : graph.Children(chosenNode)) {
+                        if (--predec[succ] == 0) {
+                            unsigned earliest = 0U;
+                            bool differentProcParent = false;
+                            for (const VertexType &par : graph.Parents(succ)) {
+                                const bool differentProc = (schedule.AssignedProcessor(par) != proc);
+                                differentProcParent |= differentProc;
+                                earliest
+                                    = std::max(earliest, static_cast<unsigned>(differentProc) * schedule.AssignedSuperStep(par));
+                            }
+                            earliest += static_cast<unsigned>(differentProcParent) * staleness;
+
+                            if (earliest <= superStep) {
+                                stepProcReady[proc].emplace(succ);
+                            } else if (earliest < superStep + staleness) {
+                                procReady[earliest % staleness][proc].emplace(succ);
+                            } else {
+                                stepFutureReady.emplace_back(succ);
+                            }
+                        }
+                    }
+                } // end while assigning
+                weightLimit = std::max(weightLimit, currentWeightAssigned);
+                totalWeightAssigned += currentWeightAssigned;
+            } // end processor loops
+
+            bool acceptStep = false;
+        }
+    }
+
+    return ReturnStatus::OSP_SUCCESS;
+}
+
+}    // end namespace osp

From 74a176d4470d0ab2f70d0715236558822fbf251e Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Mon, 9 Feb 2026 16:52:47 +0100
Subject: [PATCH 02/13] progress

---
 .../GreedySchedulers/GrowLocalMaxBsp.hpp      | 203 +++++++++++++++---
 1 file changed, 170 insertions(+), 33 deletions(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index fae4df84..e65a000a 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -20,7 +20,12 @@ limitations under the License.
 
 #include <algorithm>
 #include <array>
+#include <deque>
+#include <iterator>
 #include <limits>
+#include <queue>
+#include <set>
+#include <utility>
 #include <vector>
 
 #include "osp/bsp/scheduler/MaxBspScheduler.hpp"
@@ -60,54 +65,89 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
     const VertexType numVertices = graph.NumVertices();
     const unsigned numProcs = instance.NumberOfProcessors();
 
-    std::set<VertexType> currentlyReady;
+    std::deque<VertexType> currentlyReady;    // vertices ready in current superstep
 
-    std::array<std::vector<VertexType>, staleness> futureReady;
-    std::vector<VertexType> bestFutureReady;
+    std::array<std::deque<VertexType>, staleness> futureReady;
+    // For i = 1,2,..,staleness, the vertices in futureReady[(superstep + i) % staleness] becomes ready globally in superstep + i
+    std::deque<VertexType> bestFutureReady;
+    // vertices to be added to futureReady[superstep % staleness] which become ready globally in superstep + staleness
 
-    std::array<std::vector<std::set<VertexType>>, staleness> procReady(numProcs);
-    std::vector<std::set<VertexType>> bestProcReady(numProcs);
+    std::vector<std::vector<std::pair<VertexType, unsigned>>> currentProcReadyHeaps(numProcs);
+    std::vector<std::vector<std::pair<VertexType, unsigned>>> bestCurrentProcReadyHeaps(numProcs);
+
+    std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> procReady;
+    // For i = 0,1,2,..,staleness-1 and p processor, the vertices in procReady[(superstep + i) % staleness][p] are ready locally
+    // in superstep + i on processor p
+    std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> procReadyAdditions;
+    std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> bestProcReadyAdditions;
+
+    for (auto &arrVal : procReady) {
+        arrVal = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
+    }
+    for (auto &arrVal : procReadyAdditions) {
+        arrVal = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
+    }
+    for (auto &arrVal : bestProcReadyAdditions) {
+        arrVal = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
+    }
 
     std::vector<VertexType> predec(numVertices);
     for (const auto vert : graph.Vertices()) {
         predec[vert] = graph.InDegree(vert);
         if (predec[vert] == 0U) {
-            currentlyReady.insert(currentlyReady.end(), vert);
+            currentlyReady.emplace_back(vert);
         }
     }
 
     std::vector<std::vector<VertexType>> newAssignments(numProcs);
     std::vector<std::vector<VertexType>> bestNewAssignments(numProcs);
 
-    // const VWorkwT<GraphT> minWeightParallelCheck = params_.syncCostMultiplierParallelCheck_ * instance.SynchronisationCosts();
-    // const VWorkwT<GraphT> minSuperstepWeight = params_.syncCostMultiplierMinSuperstepWeight_ * instance.SynchronisationCosts();
-    // double desiredParallelism = static_cast<double>(numProcs);
+    const VWorkwT<GraphT> minWeightParallelCheck = params_.syncCostMultiplierParallelCheck_ * instance.SynchronisationCosts();
+    const VWorkwT<GraphT> minSuperstepWeight = params_.syncCostMultiplierMinSuperstepWeight_ * instance.SynchronisationCosts();
+
+    double desiredParallelism = static_cast<double>(numProcs);
 
     VertexType totalAssigned = 0;
     unsigned superStep = 0U;
 
     while (totalAssigned < numVertices) {
-        unsigned reducedSuperStep = superStep % staleness;
-        std::vector<std::set<VertexType>> &stepProcReady = procReady[reducedSuperStep];
-        std::vector<VertexType> &stepFutureReady = futureReady[reducedSuperStep];
+        const unsigned reducedSuperStep = superStep % staleness;
+
+        std::deque<VertexType> &stepFutureReady = futureReady[reducedSuperStep];
+        std::sort(stepFutureReady);
+        const std::size_t lengthCurrentlyReady = currentlyReady.size();
+        currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end());
+        std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end());
+
+        std::vector<std::vector<VertexType>> &stepProcReady = procReady[reducedSuperStep];
+        for (auto &procHeap : stepProcReady) {
+            std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>);    // min heap
+        }
 
         VertexType limit = params_.minSuperstepSize_;
-        double bestScore = 0;
-        double bestParallelism = 0;
+        double bestScore = 0.0;
+        double bestParallelism = 0.0;
 
-        typename std::set<VertexType>::const_iterator currentlyReadyIter;
-        typename std::set<VertexType>::const_iterator bestcurrentlyReadyIter;
+        typename std::deque<VertexType>::const_iterator currentlyReadyIter;
+        typename std::deque<VertexType>::const_iterator bestcurrentlyReadyIter;
 
         bool continueSuperstepAttemps = true;
 
         while (continueSuperstepAttemps) {
-            for (unsigned proc = 0; proc < p; proc++) {
-                newAssignments[proc].clear();
+            for (auto &procAssignments : newAssignments) {
+                procAssignments.clear();
             }
             stepFutureReady.clear();
+            currentProcReadyHeaps = stepProcReady;
 
             currentlyReadyIter = currentlyReady.cbegin();
 
+            for (auto &stepProcReadyAdditions : procReadyAdditions) {
+                for (auto &localStepProcReadyAdditions : stepProcReadyAdditions) {
+                    localStepProcReadyAdditions.clear();
+                }
+            }
+
             VertexType newTotalAssigned = 0;
             VWorkwT<GraphT> weightLimit = 0;
             VWorkwT<GraphT> totalWeightAssigned = 0;
@@ -115,12 +155,13 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
             // Processor 0
             constexpr unsigned proc0{0U};
             while (newAssignments[proc0].size() < limit) {
+                std::vector<std::pair<VertexIdxT, unsigned>> &proc0Heap = currentProcReadyHeaps[proc0];
                 VertexType chosenNode = std::numeric_limits<VertexType>::max();
                 {
-                    const auto procReadyIt = stepProcReady[proc0].cbegin();
-                    if (procReadyIt != stepProcReady[proc0].cend()) {
-                        chosenNode = *procReadyIt;
-                        stepProcReady[proc0].erase(procReadyIt);
+                    if (proc0Heap.size() != 0U) {
+                        std::pop_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>);
+                        chosenNode = proc0Heap.back().first;
+                        proc0Heap.pop_back();
                     } else if (currentlyReadyIter != currentlyReady.cend()) {
                         chosenNode = *currentlyReadyIter;
                         ++currentlyReadyIter;
@@ -131,6 +172,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
                 newAssignments[proc0].push_back(chosenNode);
                 schedule.SetAssignedProcessor(chosenNode, proc0);
+                schedule.SetAssignedSuperstepNoUpdateNumSuperstep(chosenNode, superStep);
                 ++newTotalAssigned;
                 weightLimit += graph.VertexWorkWeight(chosenNode);
 
@@ -146,15 +188,16 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                         earliest += static_cast<unsigned>(differentProcParent) * staleness;
 
                         if (earliest <= superStep) {
-                            stepProcReady[proc0].emplace(succ);
+                            proc0Heap.emplace_back(succ, superStep + staleness);
+                            std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>);
                         } else if (earliest < superStep + staleness) {
-                            procReady[earliest % staleness][proc0].emplace(succ);
+                            procReadyAdditions[earliest % staleness][proc0].emplace(succ, superStep + staleness);
                         } else {
                             stepFutureReady.emplace_back(succ);
                         }
                     }
                 }
-            } // end while assigning
+            }    // end while assigning
 
             totalWeightAssigned += weightLimit;
 
@@ -162,12 +205,13 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
             for (unsigned proc = 1U; proc < numProcs; ++proc) {
                 VWorkwT<GraphT> currentWeightAssigned = 0;
                 while (currentWeightAssigned < weightLimit) {
+                    std::vector<std::pair<VertexIdxT, unsigned>> &procHeap = currentProcReadyHeaps[proc];
                     VertexType chosenNode = std::numeric_limits<VertexType>::max();
                     {
-                        const auto procReadyIt = stepProcReady[proc].cbegin();
-                        if (procReadyIt != stepProcReady[proc].cend()) {
-                            chosenNode = *procReadyIt;
-                            stepProcReady[proc].erase(procReadyIt);
+                        if (procHeap.size() != 0U) {
+                            std::pop_heap(procHeap.begin(), procHeap.end(), std::greater<>);
+                            chosenNode = procHeap.back().first;
+                            procHeap.pop_back();
                         } else if (currentlyReadyIter != currentlyReady.cend()) {
                             chosenNode = *currentlyReadyIter;
                             ++currentlyReadyIter;
@@ -178,6 +222,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
                     newAssignments[proc].push_back(chosenNode);
                     schedule.SetAssignedProcessor(chosenNode, proc);
+                    schedule.SetAssignedSuperstepNoUpdateNumSuperstep(chosenNode, superStep);
                     ++newTotalAssigned;
                     currentWeightAssigned += graph.VertexWorkWeight(chosenNode);
 
@@ -194,21 +239,113 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                             earliest += static_cast<unsigned>(differentProcParent) * staleness;
 
                             if (earliest <= superStep) {
-                                stepProcReady[proc].emplace(succ);
+                                procHeap.emplace_back(succ, superStep + staleness);
+                                std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>);
                             } else if (earliest < superStep + staleness) {
-                                procReady[earliest % staleness][proc].emplace(succ);
+                                procReadyAdditions[earliest % staleness][proc].emplace(succ, superStep + staleness);
                             } else {
                                 stepFutureReady.emplace_back(succ);
                             }
                         }
                     }
-                } // end while assigning
+                }    // end while assigning
                 weightLimit = std::max(weightLimit, currentWeightAssigned);
                 totalWeightAssigned += currentWeightAssigned;
-            } // end processor loops
+            }    // end processor loops
 
             bool acceptStep = false;
+
+            double score
+                = static_cast<double>(totalWeightAssigned) / static_cast<double>(weightLimit + instance.SynchronisationCosts());
+            double parallelism = 0;
+            if (weightLimit > 0) {
+                parallelism = static_cast<double>(totalWeightAssigned) / static_cast<double>(weightLimit);
+            }
+
+            if (score > 0.99 * bestScore) {    // It is possible to make this less strict, i.e. score > 0.98 * best_score.
+                                               // The purpose of this would be to encourage larger supersteps.
+                bestScore = std::max(bestScore, score);
+                bestParallelism = parallelism;
+                acceptStep = true;
+            } else {
+                continueSuperstepAttemps = false;
+            }
+
+            if (weightLimit >= minWeightParallelCheck) {
+                if (parallelism < std::max(2.0, 0.8 * desiredParallelism)) {
+                    continueSuperstepAttemps = false;
+                }
+            }
+
+            if (weightLimit <= minSuperstepWeight) {
+                continueSuperstepAttemps = true;
+                if (totalAssigned + newTotalAssigned == n) {
+                    acceptStep = true;
+                    continueSuperstepAttemps = false;
+                }
+            }
+
+            if (totalAssigned + newTotalAssigned == n) {
+                continueSuperstepAttemps = false;
+            }
+
+            // Undo predec decreases
+            for (const auto &newLocalAssignments : newAssignments) {
+                for (const VertexType &node : newLocalAssignments) {
+                    for (const VertexType &succ : graph.Children(node)) {
+                        ++predec[succ];
+                    }
+                }
+            }
+
+            if (acceptStep) {
+                std::swap(bestFutureReady, stepFutureReady);
+                std::swap(bestProcReadyAdditions, procReadyAdditions);
+                std::swap(bestcurrentlyReadyIter, currentlyReadyIter);
+                std::swap(bestNewAssignments, newAssignments);
+                std::swap(bestCurrentProcReadyHeaps, currentProcReadyHeaps);
+            }
+
+            limit++;
+            limit += (limit / 2);
+        }
+
+        // apply best iteration
+        currentlyReady.erase(currentlyReady.begin(), bestcurrentlyReadyIter);
+        std::swap(futureReady[reducedSuperStep], bestFutureReady);
+
+        ++superStep;
+        for (unsigned proc = 0U; proc < numProcs; ++proc) {
+            for (const auto &vertStepPair : bestCurrentProcReadyHeaps[proc]) {
+                if (vertStepPair.second <= superStep) {
+                    futureReady[superStep % staleness].emplace_back(vertStepPair.first);
+                } else {
+                    procReady[superStep % staleness][proc].emplace_back(vertStepPair);
+                }
+            }
+        }
+
+        for (std::size_t stepInd = 0U; stepInd < staleness; ++stepInd) {
+            for (unsigned proc = 0U; proc < numProcs; ++proc) {
+                procReady[stepInd][proc].insert(procReady[stepInd][proc].end(),
+                                                bestProcReadyAdditions[stepInd][proc].begin(),
+                                                bestProcReadyAdditions[stepInd][proc].end());
+            }
         }
+
+        for (unsigned proc = 0U; proc < numProcs; ++proc) {
+            totalAssigned += bestNewAssignments[proc].size();
+            for (const VertexType &node : bestNewAssignments[proc]) {
+                schedule.SetAssignedProcessor(node, proc);
+
+                for (const VertexType &succ : graph.Children(node)) {
+                    --predec[succ];
+                }
+            }
+        }
+
+        desiredParallelism = (0.3 * desiredParallelism) + (0.6 * bestParallelism)
+                             + (0.1 * static_cast<double>(numProcs));    // weights should sum up to one
     }
 
     return ReturnStatus::OSP_SUCCESS;

From 6c0d93bca9e99ebb904b7db71a56882a9883ef78 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Mon, 9 Feb 2026 17:26:30 +0100
Subject: [PATCH 03/13] progress

---
 .../GreedySchedulers/GrowLocalMaxBsp.hpp      | 33 ++++++++++++-------
 tests/max_bsp_schedulers.cpp                  | 24 +++++++++++---
 2 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index e65a000a..3ee726a1 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -49,15 +49,24 @@ class GrowLocalSSP : public MaxBspScheduler<GraphT> {
   private:
     using VertexType = VertexIdxT<GraphT>;
 
-    constexpr std::size_t staleness{2U};
+    static constexpr std::size_t staleness{2U};
     GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> params_;
 
   public:
-    ReturnStatus ComputeSchedule(MaxBspSchedule<GraphT> &schedule);
+    ReturnStatus ComputeSchedule(BspSchedule<GraphT> &schedule) override;
+    ReturnStatus ComputeSchedule(MaxBspSchedule<GraphT> &schedule) override;
 
     std::string GetScheduleName() const override { return "GrowLocalSSP"; }
 };
 
+template <typename GraphT>
+ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(BspSchedule<GraphT> &schedule) {
+    MaxBspSchedule<GraphT> tmpSched(schedule.GetInstance());
+    ReturnStatus status = ComputeSchedule(tmpSched);
+    schedule = tmpSched;
+    return status;
+}
+
 template <typename GraphT>
 ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &schedule) {
     const BspInstance<GraphT> &instance = schedule.GetInstance();
@@ -121,7 +130,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
         std::vector<std::vector<VertexType>> &stepProcReady = procReady[reducedSuperStep];
         for (auto &procHeap : stepProcReady) {
-            std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>);    // min heap
+            std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>{});    // min heap
         }
 
         VertexType limit = params_.minSuperstepSize_;
@@ -155,11 +164,11 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
             // Processor 0
             constexpr unsigned proc0{0U};
             while (newAssignments[proc0].size() < limit) {
-                std::vector<std::pair<VertexIdxT, unsigned>> &proc0Heap = currentProcReadyHeaps[proc0];
+                std::vector<std::pair<VertexType, unsigned>> &proc0Heap = currentProcReadyHeaps[proc0];
                 VertexType chosenNode = std::numeric_limits<VertexType>::max();
                 {
                     if (proc0Heap.size() != 0U) {
-                        std::pop_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>);
+                        std::pop_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>{});
                         chosenNode = proc0Heap.back().first;
                         proc0Heap.pop_back();
                     } else if (currentlyReadyIter != currentlyReady.cend()) {
@@ -189,7 +198,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
                         if (earliest <= superStep) {
                             proc0Heap.emplace_back(succ, superStep + staleness);
-                            std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>);
+                            std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>{});
                         } else if (earliest < superStep + staleness) {
                             procReadyAdditions[earliest % staleness][proc0].emplace(succ, superStep + staleness);
                         } else {
@@ -205,11 +214,11 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
             for (unsigned proc = 1U; proc < numProcs; ++proc) {
                 VWorkwT<GraphT> currentWeightAssigned = 0;
                 while (currentWeightAssigned < weightLimit) {
-                    std::vector<std::pair<VertexIdxT, unsigned>> &procHeap = currentProcReadyHeaps[proc];
+                    std::vector<std::pair<VertexType, unsigned>> &procHeap = currentProcReadyHeaps[proc];
                     VertexType chosenNode = std::numeric_limits<VertexType>::max();
                     {
                         if (procHeap.size() != 0U) {
-                            std::pop_heap(procHeap.begin(), procHeap.end(), std::greater<>);
+                            std::pop_heap(procHeap.begin(), procHeap.end(), std::greater<>{});
                             chosenNode = procHeap.back().first;
                             procHeap.pop_back();
                         } else if (currentlyReadyIter != currentlyReady.cend()) {
@@ -240,7 +249,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
                             if (earliest <= superStep) {
                                 procHeap.emplace_back(succ, superStep + staleness);
-                                std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>);
+                                std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>{});
                             } else if (earliest < superStep + staleness) {
                                 procReadyAdditions[earliest % staleness][proc].emplace(succ, superStep + staleness);
                             } else {
@@ -279,13 +288,13 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
             if (weightLimit <= minSuperstepWeight) {
                 continueSuperstepAttemps = true;
-                if (totalAssigned + newTotalAssigned == n) {
+                if (totalAssigned + newTotalAssigned == numVertices) {
                     acceptStep = true;
                     continueSuperstepAttemps = false;
                 }
             }
 
-            if (totalAssigned + newTotalAssigned == n) {
+            if (totalAssigned + newTotalAssigned == numVertices) {
                 continueSuperstepAttemps = false;
             }
 
@@ -348,6 +357,8 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                              + (0.1 * static_cast<double>(numProcs));    // weights should sum up to one
     }
 
+    schedule.SetNumberOfSupersteps(superStep);
+
     return ReturnStatus::OSP_SUCCESS;
 }
 
diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp
index 908b0493..6e33487c 100644
--- a/tests/max_bsp_schedulers.cpp
+++ b/tests/max_bsp_schedulers.cpp
@@ -27,7 +27,9 @@ limitations under the License.
 #include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp"
+#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp"
 #include "osp/bsp/scheduler/MaxBspScheduler.hpp"
+#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "test_graphs.hpp"
@@ -61,17 +63,19 @@ void RunTest(Scheduler<GraphT> *testScheduler) {
             std::cout << "Graph: " << nameGraph << std::endl;
             std::cout << "Architecture: " << nameMachine << std::endl;
 
-            BspInstance<GraphT> instance;
+            ComputationalDagEdgeIdxVectorImplDefIntT graph;
+            BspArchitecture<GraphT> arch;
 
-            bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag());
-            bool statusArchitecture
-                = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture());
+            bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), graph);
+            bool statusArchitecture = file_reader::ReadBspArchitecture((cwd / filenameMachine).string(), arch);
 
             if (!statusGraph || !statusArchitecture) {
                 std::cout << "Reading files failed." << std::endl;
                 BOOST_CHECK(false);
             }
 
+            BspInstance<GraphT> instance(graph, arch);
+
             BspSchedule<GraphT> schedule(instance);
             const auto result = testScheduler->ComputeSchedule(schedule);
 
@@ -142,3 +146,15 @@ BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerMaxBspScheduleLargeTest) {
     GreedyVarianceSspScheduler<ComputationalDagEdgeIdxVectorImplDefIntT> test;
     RunTestMaxBsp(&test);
 }
+
+// Tests ComputeSchedule(BspSchedule&) → staleness = 1
+BOOST_AUTO_TEST_CASE(GrowLocalSSPBspScheduleLargeTest) {
+    GrowLocalSSP<CompactSparseGraph<false>> test;
+    RunTest(&test);
+}
+
+// Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2
+BOOST_AUTO_TEST_CASE(GrowLocalSSPMaxBspScheduleLargeTest) {
+    GrowLocalSSP<CompactSparseGraph<false>> test;
+    RunTestMaxBsp(&test);
+}
\ No newline at end of file

From 9204a06cd75220660c15ae036648240d883bcd30 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Mon, 9 Feb 2026 17:52:54 +0100
Subject: [PATCH 04/13] compiles

---
 include/osp/bsp/model/BspInstance.hpp           | 16 +++++++++++++---
 .../GreedySchedulers/GrowLocalMaxBsp.hpp        | 16 ++++++++--------
 tests/max_bsp_schedulers.cpp                    | 17 ++++++++++-------
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index 358947e9..0fcd8040 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -107,10 +107,20 @@ class BspInstance {
      * @param cdag The computational DAG for the instance.
      * @param architecture The BSP architecture for the instance.
      */
-    BspInstance(const GraphT &cdag,
+    template <typename OtherGraphT>
+    BspInstance(const OtherGraphT &cdag,
                 const BspArchitecture<GraphT> &architecture,
                 std::vector<std::vector<bool>> nodeProcessorCompatibility = std::vector<std::vector<bool>>({{true}}))
-        : cdag_(cdag), architecture_(architecture), nodeProcessorCompatibility_(nodeProcessorCompatibility) {}
+        : cdag_(cdag), architecture_(architecture), nodeProcessorCompatibility_(nodeProcessorCompatibility) {
+        static_assert(std::is_same_v<VMemwT<GraphT>, VMemwT<OtherGraphT>>,
+                      "BspArchitecture: GraphT and Graph_t_other have the same memory weight type.");
+
+        static_assert(std::is_same_v<VCommwT<GraphT>, VCommwT<OtherGraphT>>,
+                      "BspArchitecture: GraphT and Graph_t_other have the same communication weight type.");
+
+        static_assert(std::is_same_v<VTypeT<GraphT>, VTypeT<OtherGraphT>>,
+                      "BspArchitecture: GraphT and Graph_t_other have the same processor type.");
+    }
 
     /**
      * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture.
@@ -319,7 +329,7 @@ class BspInstance {
     bool HasAnyTypeRestrictions() const {
         for (VertexTypeTOrDefault node_type = 0; node_type < nodeProcessorCompatibility_.size(); ++node_type) {
             for (VertexTypeTOrDefault proc_type = 0; proc_type < nodeProcessorCompatibility_[node_type].size(); ++proc_type) {
-                if(!nodeProcessorCompatibility_[node_type][proc_type]) {
+                if (!nodeProcessorCompatibility_[node_type][proc_type]) {
                     return true;
                 }
             }
diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index 3ee726a1..0cc191a8 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -49,7 +49,7 @@ class GrowLocalSSP : public MaxBspScheduler<GraphT> {
   private:
     using VertexType = VertexIdxT<GraphT>;
 
-    static constexpr std::size_t staleness{2U};
+    static constexpr unsigned staleness{2U};
     GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> params_;
 
   public:
@@ -123,12 +123,12 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
         const unsigned reducedSuperStep = superStep % staleness;
 
         std::deque<VertexType> &stepFutureReady = futureReady[reducedSuperStep];
-        std::sort(stepFutureReady);
-        const std::size_t lengthCurrentlyReady = currentlyReady.size();
+        std::sort(stepFutureReady.begin(), stepFutureReady.end(), std::less<>{});
+        const typename std::deque<VertexType>::difference_type lengthCurrentlyReady = std::distance(currentlyReady.begin(), currentlyReady.end());
         currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end());
         std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end());
 
-        std::vector<std::vector<VertexType>> &stepProcReady = procReady[reducedSuperStep];
+        std::vector<std::vector<std::pair<VertexType, unsigned>>> &stepProcReady = procReady[reducedSuperStep];
         for (auto &procHeap : stepProcReady) {
             std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>{});    // min heap
         }
@@ -192,7 +192,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                         for (const VertexType &par : graph.Parents(succ)) {
                             const bool differentProc = (schedule.AssignedProcessor(par) != proc0);
                             differentProcParent |= differentProc;
-                            earliest = std::max(earliest, static_cast<unsigned>(differentProc) * schedule.AssignedSuperStep(par));
+                            earliest = std::max(earliest, static_cast<unsigned>(differentProc) * schedule.AssignedSuperstep(par));
                         }
                         earliest += static_cast<unsigned>(differentProcParent) * staleness;
 
@@ -200,7 +200,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                             proc0Heap.emplace_back(succ, superStep + staleness);
                             std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>{});
                         } else if (earliest < superStep + staleness) {
-                            procReadyAdditions[earliest % staleness][proc0].emplace(succ, superStep + staleness);
+                            procReadyAdditions[earliest % staleness][proc0].emplace_back(succ, superStep + staleness);
                         } else {
                             stepFutureReady.emplace_back(succ);
                         }
@@ -243,7 +243,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                                 const bool differentProc = (schedule.AssignedProcessor(par) != proc);
                                 differentProcParent |= differentProc;
                                 earliest
-                                    = std::max(earliest, static_cast<unsigned>(differentProc) * schedule.AssignedSuperStep(par));
+                                    = std::max(earliest, static_cast<unsigned>(differentProc) * schedule.AssignedSuperstep(par));
                             }
                             earliest += static_cast<unsigned>(differentProcParent) * staleness;
 
@@ -251,7 +251,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                                 procHeap.emplace_back(succ, superStep + staleness);
                                 std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>{});
                             } else if (earliest < superStep + staleness) {
-                                procReadyAdditions[earliest % staleness][proc].emplace(succ, superStep + staleness);
+                                procReadyAdditions[earliest % staleness][proc].emplace_back(succ, superStep + staleness);
                             } else {
                                 stepFutureReady.emplace_back(succ);
                             }
diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp
index 6e33487c..60f2b55d 100644
--- a/tests/max_bsp_schedulers.cpp
+++ b/tests/max_bsp_schedulers.cpp
@@ -36,6 +36,9 @@ limitations under the License.
 
 using namespace osp;
 
+using VImpl1 = CDagVertexImpl<std::size_t, unsigned, unsigned, unsigned, unsigned>;
+using VImpl2 = CDagVertexImpl<uint32_t, unsigned, unsigned, unsigned, unsigned>;
+
 std::vector<std::string> TestArchitectures() { return {"data/machine_params/p3.arch"}; }
 
 template <typename GraphT>
@@ -63,7 +66,7 @@ void RunTest(Scheduler<GraphT> *testScheduler) {
             std::cout << "Graph: " << nameGraph << std::endl;
             std::cout << "Architecture: " << nameMachine << std::endl;
 
-            ComputationalDagEdgeIdxVectorImplDefIntT graph;
+            ComputationalDagVectorImpl<VImpl1> graph;
             BspArchitecture<GraphT> arch;
 
             bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), graph);
@@ -108,7 +111,7 @@ void RunTestMaxBsp(MaxBspScheduler<GraphT> *testScheduler) {
                       << "Graph: " << nameGraph << std::endl
                       << "Architecture: " << nameMachine << std::endl;
 
-            ComputationalDagEdgeIdxVectorImplDefIntT graph;
+            ComputationalDagVectorImpl<VImpl1> graph;
             BspArchitecture<GraphT> arch;
 
             bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), graph);
@@ -131,30 +134,30 @@ void RunTestMaxBsp(MaxBspScheduler<GraphT> *testScheduler) {
 
 // Tests ComputeSchedule(BspSchedule&) → staleness = 1
 BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerTestVectorImpl) {
-    GreedyVarianceSspScheduler<ComputationalDagVectorImplDefUnsignedT> test;
+    GreedyVarianceSspScheduler<ComputationalDagVectorImpl<VImpl1>> test;
     RunTest(&test);
 }
 
 // Tests ComputeSchedule(BspSchedule&) → staleness = 1 (different graph impl)
 BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerTestEdgeIdxImpl) {
-    GreedyVarianceSspScheduler<ComputationalDagEdgeIdxVectorImplDefT> test;
+    GreedyVarianceSspScheduler<ComputationalDagVectorImpl<VImpl2>> test;
     RunTest(&test);
 }
 
 // Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2
 BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerMaxBspScheduleLargeTest) {
-    GreedyVarianceSspScheduler<ComputationalDagEdgeIdxVectorImplDefIntT> test;
+    GreedyVarianceSspScheduler<ComputationalDagVectorImpl<VImpl1>> test;
     RunTestMaxBsp(&test);
 }
 
 // Tests ComputeSchedule(BspSchedule&) → staleness = 1
 BOOST_AUTO_TEST_CASE(GrowLocalSSPBspScheduleLargeTest) {
-    GrowLocalSSP<CompactSparseGraph<false>> test;
+    GrowLocalSSP<CompactSparseGraph<true, true, true, true, true>> test;
     RunTest(&test);
 }
 
 // Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2
 BOOST_AUTO_TEST_CASE(GrowLocalSSPMaxBspScheduleLargeTest) {
-    GrowLocalSSP<CompactSparseGraph<false>> test;
+    GrowLocalSSP<CompactSparseGraph<true, true, true, true, true>> test;
     RunTestMaxBsp(&test);
 }
\ No newline at end of file

From 26f83efa75ece9f97560a46ac3ab2921df4f7d50 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Tue, 10 Feb 2026 08:53:45 +0100
Subject: [PATCH 05/13] some fixes

---
 .../GreedySchedulers/GrowLocalMaxBsp.hpp      | 11 +++++---
 tests/max_bsp_schedulers.cpp                  | 25 ++++++++++++++++---
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index 0cc191a8..6c58f79d 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -124,7 +124,8 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
         std::deque<VertexType> &stepFutureReady = futureReady[reducedSuperStep];
         std::sort(stepFutureReady.begin(), stepFutureReady.end(), std::less<>{});
-        const typename std::deque<VertexType>::difference_type lengthCurrentlyReady = std::distance(currentlyReady.begin(), currentlyReady.end());
+        const typename std::deque<VertexType>::difference_type lengthCurrentlyReady
+            = std::distance(currentlyReady.begin(), currentlyReady.end());
         currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end());
         std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end());
 
@@ -134,7 +135,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
         }
 
         VertexType limit = params_.minSuperstepSize_;
-        double bestScore = 0.0;
+        double bestScore = std::numeric_limits<double>::lowest();
         double bestParallelism = 0.0;
 
         typename std::deque<VertexType>::const_iterator currentlyReadyIter;
@@ -266,7 +267,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
             double score
                 = static_cast<double>(totalWeightAssigned) / static_cast<double>(weightLimit + instance.SynchronisationCosts());
-            double parallelism = 0;
+            double parallelism = 0.0;
             if (weightLimit > 0) {
                 parallelism = static_cast<double>(totalWeightAssigned) / static_cast<double>(weightLimit);
             }
@@ -294,6 +295,10 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                 }
             }
 
+            if (currentlyReadyIter == currentlyReady.cend()) {
+                continueSuperstepAttemps = false;
+            }
+
             if (totalAssigned + newTotalAssigned == numVertices) {
                 continueSuperstepAttemps = false;
             }
diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp
index 60f2b55d..e8a9721c 100644
--- a/tests/max_bsp_schedulers.cpp
+++ b/tests/max_bsp_schedulers.cpp
@@ -41,6 +41,23 @@ using VImpl2 = CDagVertexImpl<uint32_t, unsigned, unsigned, unsigned, unsigned>;
 
 std::vector<std::string> TestArchitectures() { return {"data/machine_params/p3.arch"}; }
 
+template <typename GraphT>
+void checkPrecedenceContraints(const BspSchedule<GraphT> &schedule, const unsigned staleness) {
+    for (const auto &v : schedule.GetInstance().GetComputationalDag().Vertices()) {
+        BOOST_CHECK_LT(schedule.AssignedSuperstep(v), schedule.NumberOfSupersteps());
+
+        for (const auto &chld : schedule.GetInstance().GetComputationalDag().Children(v)) {
+            const unsigned differentProcessors
+                = (schedule.AssignedProcessor(v) == schedule.AssignedProcessor(chld)) ? 0U : staleness;
+
+            BOOST_CHECK_LE(schedule.AssignedSuperstep(v) + differentProcessors, schedule.AssignedSuperstep(chld));
+            if (schedule.AssignedSuperstep(v) + differentProcessors > schedule.AssignedSuperstep(chld)) {
+                std::cout << "Vertex: " << v << " Child: " << chld << '\n';
+            }
+        }
+    }
+}
+
 template <typename GraphT>
 void RunTest(Scheduler<GraphT> *testScheduler) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
@@ -84,6 +101,7 @@ void RunTest(Scheduler<GraphT> *testScheduler) {
 
             BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result);
             BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints());
+            checkPrecedenceContraints(schedule, 1U);
         }
     }
 }
@@ -128,6 +146,7 @@ void RunTestMaxBsp(MaxBspScheduler<GraphT> *testScheduler) {
 
             BOOST_CHECK_EQUAL(result, ReturnStatus::OSP_SUCCESS);
             BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints());
+            checkPrecedenceContraints(schedule, 2U);
         }
     }
 }
@@ -152,12 +171,12 @@ BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerMaxBspScheduleLargeTest) {
 
 // Tests ComputeSchedule(BspSchedule&) → staleness = 1
 BOOST_AUTO_TEST_CASE(GrowLocalSSPBspScheduleLargeTest) {
-    GrowLocalSSP<CompactSparseGraph<true, true, true, true, true>> test;
+    GrowLocalSSP<CompactSparseGraph<false, true, true, true, true>> test;
     RunTest(&test);
 }
 
 // Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2
 BOOST_AUTO_TEST_CASE(GrowLocalSSPMaxBspScheduleLargeTest) {
-    GrowLocalSSP<CompactSparseGraph<true, true, true, true, true>> test;
+    GrowLocalSSP<CompactSparseGraph<false, true, true, true, true>> test;
     RunTestMaxBsp(&test);
-}
\ No newline at end of file
+}

From 69f30c76d78671a41de8eb931b2707d33a00fa04 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Tue, 10 Feb 2026 10:48:48 +0100
Subject: [PATCH 06/13] progress

---
 .../GreedySchedulers/GrowLocalMaxBsp.hpp      | 31 +++++++++----------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index 6c58f79d..cf979846 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -188,14 +188,12 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
                 for (const VertexType &succ : graph.Children(chosenNode)) {
                     if (--predec[succ] == 0) {
-                        unsigned earliest = 0U;
-                        bool differentProcParent = false;
+                        unsigned earliest = superStep;
                         for (const VertexType &par : graph.Parents(succ)) {
-                            const bool differentProc = (schedule.AssignedProcessor(par) != proc0);
-                            differentProcParent |= differentProc;
-                            earliest = std::max(earliest, static_cast<unsigned>(differentProc) * schedule.AssignedSuperstep(par));
+                            const bool sameProc = (schedule.AssignedProcessor(par) == proc0);
+                            const unsigned constraint = sameProc ? superStep : schedule.AssignedSuperstep(par) + staleness;
+                            earliest = std::max(earliest, constraint);
                         }
-                        earliest += static_cast<unsigned>(differentProcParent) * staleness;
 
                         if (earliest <= superStep) {
                             proc0Heap.emplace_back(succ, superStep + staleness);
@@ -238,15 +236,12 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
 
                     for (const VertexType &succ : graph.Children(chosenNode)) {
                         if (--predec[succ] == 0) {
-                            unsigned earliest = 0U;
-                            bool differentProcParent = false;
+                            unsigned earliest = superStep;
                             for (const VertexType &par : graph.Parents(succ)) {
-                                const bool differentProc = (schedule.AssignedProcessor(par) != proc);
-                                differentProcParent |= differentProc;
-                                earliest
-                                    = std::max(earliest, static_cast<unsigned>(differentProc) * schedule.AssignedSuperstep(par));
+                                const bool sameProc = (schedule.AssignedProcessor(par) == proc);
+                                const unsigned constraint = sameProc ? superStep : schedule.AssignedSuperstep(par) + staleness;
+                                earliest = std::max(earliest, constraint);
                             }
-                            earliest += static_cast<unsigned>(differentProcParent) * staleness;
 
                             if (earliest <= superStep) {
                                 procHeap.emplace_back(succ, superStep + staleness);
@@ -328,13 +323,13 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
         currentlyReady.erase(currentlyReady.begin(), bestcurrentlyReadyIter);
         std::swap(futureReady[reducedSuperStep], bestFutureReady);
 
-        ++superStep;
+        const unsigned nextSuperStep = superStep + 1U;
         for (unsigned proc = 0U; proc < numProcs; ++proc) {
             for (const auto &vertStepPair : bestCurrentProcReadyHeaps[proc]) {
-                if (vertStepPair.second <= superStep) {
-                    futureReady[superStep % staleness].emplace_back(vertStepPair.first);
+                if (vertStepPair.second <= nextSuperStep) {
+                    futureReady[nextSuperStep % staleness].emplace_back(vertStepPair.first);
                 } else {
-                    procReady[superStep % staleness][proc].emplace_back(vertStepPair);
+                    procReady[nextSuperStep % staleness][proc].emplace_back(vertStepPair);
                 }
             }
         }
@@ -351,6 +346,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
             totalAssigned += bestNewAssignments[proc].size();
             for (const VertexType &node : bestNewAssignments[proc]) {
                 schedule.SetAssignedProcessor(node, proc);
+                // schedule.SetAssignedSuperstepNoUpdateNumSuperstep(node, superStep);
 
                 for (const VertexType &succ : graph.Children(node)) {
                     --predec[succ];
@@ -358,6 +354,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
             }
         }
 
+        ++superStep;
         desiredParallelism = (0.3 * desiredParallelism) + (0.6 * bestParallelism)
                              + (0.1 * static_cast<double>(numProcs));    // weights should sum up to one
     }

From 519e75d2916a4ba2c4e6a3317fbe3e334cc3f6ac Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Tue, 10 Feb 2026 10:52:39 +0100
Subject: [PATCH 07/13] fixed bug

---
 .../scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp    |  4 ++++
 tests/max_bsp_schedulers.cpp                          | 11 ++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index cf979846..808597f7 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -323,6 +323,10 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
         currentlyReady.erase(currentlyReady.begin(), bestcurrentlyReadyIter);
         std::swap(futureReady[reducedSuperStep], bestFutureReady);
 
+        for (auto &localProcReady : procReady[reducedSuperStep]) {
+            localProcReady.clear();
+        }
+
         const unsigned nextSuperStep = superStep + 1U;
         for (unsigned proc = 0U; proc < numProcs; ++proc) {
             for (const auto &vertStepPair : bestCurrentProcReadyHeaps[proc]) {
diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp
index e8a9721c..46a53511 100644
--- a/tests/max_bsp_schedulers.cpp
+++ b/tests/max_bsp_schedulers.cpp
@@ -47,12 +47,13 @@ void checkPrecedenceContraints(const BspSchedule<GraphT> &schedule, const unsign
         BOOST_CHECK_LT(schedule.AssignedSuperstep(v), schedule.NumberOfSupersteps());
 
         for (const auto &chld : schedule.GetInstance().GetComputationalDag().Children(v)) {
-            const unsigned differentProcessors
-                = (schedule.AssignedProcessor(v) == schedule.AssignedProcessor(chld)) ? 0U : staleness;
+            const unsigned sameProcessors = (schedule.AssignedProcessor(v) == schedule.AssignedProcessor(chld)) ? 0U : staleness;
 
-            BOOST_CHECK_LE(schedule.AssignedSuperstep(v) + differentProcessors, schedule.AssignedSuperstep(chld));
-            if (schedule.AssignedSuperstep(v) + differentProcessors > schedule.AssignedSuperstep(chld)) {
-                std::cout << "Vertex: " << v << " Child: " << chld << '\n';
+            BOOST_CHECK_LE(schedule.AssignedSuperstep(v) + sameProcessors, schedule.AssignedSuperstep(chld));
+            if (schedule.AssignedSuperstep(v) + sameProcessors > schedule.AssignedSuperstep(chld)) {
+                std::cout << "Vertex: " << v << " (S:" << schedule.AssignedSuperstep(v) << " P:" << schedule.AssignedProcessor(v)
+                          << ")" << " Child: " << chld << " (S:" << schedule.AssignedSuperstep(chld)
+                          << " P:" << schedule.AssignedProcessor(chld) << ")" << '\n';
             }
         }
     }

From 7e445bef75aaedc4eba7430847afd839a2b3e757 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Tue, 10 Feb 2026 11:13:59 +0100
Subject: [PATCH 08/13] removed commented code

---
 include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index 808597f7..5f37937e 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -350,7 +350,6 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
             totalAssigned += bestNewAssignments[proc].size();
             for (const VertexType &node : bestNewAssignments[proc]) {
                 schedule.SetAssignedProcessor(node, proc);
-                // schedule.SetAssignedSuperstepNoUpdateNumSuperstep(node, superStep);
 
                 for (const VertexType &succ : graph.Children(node)) {
                     --predec[succ];

From 9215295a5c2975fb179bd39bbaa849c2418f01da Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Tue, 10 Feb 2026 11:36:38 +0100
Subject: [PATCH 09/13] removed restrictions

---
 .../osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index 5f37937e..05bcd53b 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -43,8 +43,6 @@ template <typename GraphT>
 class GrowLocalSSP : public MaxBspScheduler<GraphT> {
     static_assert(isDirectedGraphV<GraphT>);
     static_assert(hasVertexWeightsV<GraphT>);
-    static_assert(hasVerticesInTopOrderV<GraphT>);
-    static_assert(hasChildrenInVertexOrderV<GraphT>);
 
   private:
     using VertexType = VertexIdxT<GraphT>;
@@ -107,6 +105,9 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
             currentlyReady.emplace_back(vert);
         }
     }
+    if constexpr (not hasVerticesInTopOrderV<GraphT>) {
+        std::sort(currentlyReady.begin(), currentlyReady.end(), std::less<>{});
+    }
 
     std::vector<std::vector<VertexType>> newAssignments(numProcs);
     std::vector<std::vector<VertexType>> bestNewAssignments(numProcs);

From 63f9a7dea670251dc5a0e6d524926be2733afbea Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Tue, 10 Feb 2026 14:55:03 +0100
Subject: [PATCH 10/13] conditioning for next superstep

---
 .../scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp  | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index 05bcd53b..4625d2cd 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -130,6 +130,15 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
         currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end());
         std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end());
 
+        const typename std::deque<VertexType>::difference_type maxCurrentlyReadyUsage
+            = (staleness == 1U) ? std::distance(currentlyReady.begin(), currentlyReady.end())
+                                : ((std::distance(currentlyReady.begin(), currentlyReady.end())
+                                    + std::distance(futureReady[(superStep + 1U) % staleness].begin(),
+                                                    futureReady[(superStep + 1U) % staleness].end())
+                                    + 2)
+                                   / 3)
+                                      * 2;
+
         std::vector<std::vector<std::pair<VertexType, unsigned>>> &stepProcReady = procReady[reducedSuperStep];
         for (auto &procHeap : stepProcReady) {
             std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>{});    // min heap
@@ -295,6 +304,10 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
                 continueSuperstepAttemps = false;
             }
 
+            if (std::distance(currentlyReady.cbegin(), currentlyReadyIter) > maxCurrentlyReadyUsage) {
+                continueSuperstepAttemps = false;
+            }
+
             if (totalAssigned + newTotalAssigned == numVertices) {
                 continueSuperstepAttemps = false;
             }

From 36f9f9da661a23f9a246bc93a086d45d8c3bbc77 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Tue, 10 Feb 2026 15:24:52 +0100
Subject: [PATCH 11/13] improved lower bound on allready usage

---
 .../GreedySchedulers/GrowLocalMaxBsp.hpp      | 32 ++++++++++++++-----
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index 4625d2cd..9f49985c 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -34,7 +34,7 @@ namespace osp {
 
 template <typename VertT, typename WeightT>
 struct GrowLocalSSPParams {
-    VertT minSuperstepSize_ = 20;
+    VertT minSuperstepSize_ = 10;
     WeightT syncCostMultiplierMinSuperstepWeight_ = 1;
     WeightT syncCostMultiplierParallelCheck_ = 4;
 };
@@ -50,6 +50,9 @@ class GrowLocalSSP : public MaxBspScheduler<GraphT> {
     static constexpr unsigned staleness{2U};
     GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> params_;
 
+    typename std::deque<VertexType>::difference_type maxAllReadyUsage(const std::deque<VertexType> &currentlyReady,
+                                                                      const std::deque<VertexType> &nextSuperstepReady) const;
+
   public:
     ReturnStatus ComputeSchedule(BspSchedule<GraphT> &schedule) override;
     ReturnStatus ComputeSchedule(MaxBspSchedule<GraphT> &schedule) override;
@@ -57,6 +60,23 @@ class GrowLocalSSP : public MaxBspScheduler<GraphT> {
     std::string GetScheduleName() const override { return "GrowLocalSSP"; }
 };
 
+template <typename GraphT>
+typename std::deque<VertexIdxT<GraphT>>::difference_type GrowLocalSSP<GraphT>::maxAllReadyUsage(
+    const std::deque<VertexIdxT<GraphT>> &currentlyReady, const std::deque<VertexIdxT<GraphT>> &nextSuperstepReady) const {
+    if constexpr (staleness == 1U) {
+        return std::distance(currentlyReady.cbegin(), currentlyReady.cend());
+    } else {
+        typename std::deque<VertexType>::difference_type lengthCurrently
+            = std::distance(currentlyReady.cbegin(), currentlyReady.cend());
+        typename std::deque<VertexType>::difference_type lengthNext
+            = std::distance(nextSuperstepReady.cbegin(), nextSuperstepReady.cend());
+
+        typename std::deque<VertexType>::difference_type ans = ((lengthCurrently + lengthNext + 2) / 3) * 2;
+
+        return ans;
+    }
+}
+
 template <typename GraphT>
 ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(BspSchedule<GraphT> &schedule) {
     MaxBspSchedule<GraphT> tmpSched(schedule.GetInstance());
@@ -131,13 +151,9 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
         std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end());
 
         const typename std::deque<VertexType>::difference_type maxCurrentlyReadyUsage
-            = (staleness == 1U) ? std::distance(currentlyReady.begin(), currentlyReady.end())
-                                : ((std::distance(currentlyReady.begin(), currentlyReady.end())
-                                    + std::distance(futureReady[(superStep + 1U) % staleness].begin(),
-                                                    futureReady[(superStep + 1U) % staleness].end())
-                                    + 2)
-                                   / 3)
-                                      * 2;
+            = std::max(static_cast<typename std::deque<VertexType>::difference_type>(
+                           static_cast<double>(params_.minSuperstepSize_) * desiredParallelism),
+                       maxAllReadyUsage(currentlyReady, futureReady[(superStep + 1U) % staleness]));
 
         std::vector<std::vector<std::pair<VertexType, unsigned>>> &stepProcReady = procReady[reducedSuperStep];
         for (auto &procHeap : stepProcReady) {

From 3a10ac55b3253258123e475f1d7f97f772c68810 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Wed, 11 Feb 2026 10:15:15 +0100
Subject: [PATCH 12/13] added compare to inplace merge

---
 include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index 9f49985c..d15900f3 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -148,7 +148,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
         const typename std::deque<VertexType>::difference_type lengthCurrentlyReady
             = std::distance(currentlyReady.begin(), currentlyReady.end());
         currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end());
-        std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end());
+        std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end(), std::less<>{});
 
         const typename std::deque<VertexType>::difference_type maxCurrentlyReadyUsage
             = std::max(static_cast<typename std::deque<VertexType>::difference_type>(

From e7acfbf357fece77e8f30d8d1339767e16b77859 Mon Sep 17 00:00:00 2001
From: Raphael Steiner <raphael.steiner@huawei.com>
Date: Wed, 11 Feb 2026 11:38:22 +0100
Subject: [PATCH 13/13] changed to base function call

---
 .../osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp   | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
index d15900f3..8ce849c6 100644
--- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
+++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp
@@ -79,10 +79,7 @@ typename std::deque<VertexIdxT<GraphT>>::difference_type GrowLocalSSP<GraphT>::m
 
 template <typename GraphT>
 ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(BspSchedule<GraphT> &schedule) {
-    MaxBspSchedule<GraphT> tmpSched(schedule.GetInstance());
-    ReturnStatus status = ComputeSchedule(tmpSched);
-    schedule = tmpSched;
-    return status;
+    return MaxBspScheduler<GraphT>::ComputeSchedule(schedule);
 }
 
 template <typename GraphT>