From 1ba5870870374969b14957c025d8de37265efe94 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Fri, 6 Feb 2026 15:23:15 +0100 Subject: [PATCH 01/13] progress --- .../GreedySchedulers/GrowLocalMaxBsp.hpp | 217 ++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp new file mode 100644 index 00000000..fae4df84 --- /dev/null +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -0,0 +1,217 @@ +/* +Copyright 2026 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include +#include +#include +#include + +#include "osp/bsp/scheduler/MaxBspScheduler.hpp" + +namespace osp { + +template +struct GrowLocalSSPParams { + VertT minSuperstepSize_ = 20; + WeightT syncCostMultiplierMinSuperstepWeight_ = 1; + WeightT syncCostMultiplierParallelCheck_ = 4; +}; + +template +class GrowLocalSSP : public MaxBspScheduler { + static_assert(isDirectedGraphV); + static_assert(hasVertexWeightsV); + static_assert(hasVerticesInTopOrderV); + static_assert(hasChildrenInVertexOrderV); + + private: + using VertexType = VertexIdxT; + + constexpr std::size_t staleness{2U}; + GrowLocalSSPParams, VWorkwT> params_; + + public: + ReturnStatus ComputeSchedule(MaxBspSchedule &schedule); + + std::string GetScheduleName() const override { return "GrowLocalSSP"; } +}; + +template +ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &schedule) { + const BspInstance &instance = schedule.GetInstance(); + const GraphT &graph = instance.GetComputationalDag(); + const VertexType numVertices = graph.NumVertices(); + const unsigned numProcs = instance.NumberOfProcessors(); + + std::set currentlyReady; + + std::array, staleness> futureReady; + std::vector bestFutureReady; + + std::array>, staleness> procReady(numProcs); + std::vector> bestProcReady(numProcs); + + std::vector predec(numVertices); + for (const auto vert : graph.Vertices()) { + predec[vert] = graph.InDegree(vert); + if (predec[vert] == 0U) { + currentlyReady.insert(currentlyReady.end(), vert); + } + } + + std::vector> newAssignments(numProcs); + std::vector> bestNewAssignments(numProcs); + + // const VWorkwT minWeightParallelCheck = params_.syncCostMultiplierParallelCheck_ * instance.SynchronisationCosts(); + // const VWorkwT minSuperstepWeight = params_.syncCostMultiplierMinSuperstepWeight_ * instance.SynchronisationCosts(); + // double desiredParallelism = static_cast(numProcs); + + VertexType totalAssigned = 0; + unsigned superStep = 0U; + + while (totalAssigned < numVertices) { + unsigned reducedSuperStep = superStep % staleness; + std::vector> &stepProcReady = procReady[reducedSuperStep]; + std::vector &stepFutureReady = futureReady[reducedSuperStep]; + + VertexType limit = params_.minSuperstepSize_; + double bestScore = 0; + double bestParallelism = 0; + + typename std::set::const_iterator currentlyReadyIter; + typename std::set::const_iterator bestcurrentlyReadyIter; + + bool continueSuperstepAttemps = true; + + while (continueSuperstepAttemps) { + for (unsigned proc = 0; proc < p; proc++) { + newAssignments[proc].clear(); + } + stepFutureReady.clear(); + + currentlyReadyIter = currentlyReady.cbegin(); + + VertexType newTotalAssigned = 0; + VWorkwT weightLimit = 0; + VWorkwT totalWeightAssigned = 0; + + // Processor 0 + constexpr unsigned proc0{0U}; + while (newAssignments[proc0].size() < limit) { + VertexType chosenNode = std::numeric_limits::max(); + { + const auto procReadyIt = stepProcReady[proc0].cbegin(); + if (procReadyIt != stepProcReady[proc0].cend()) { + chosenNode = *procReadyIt; + stepProcReady[proc0].erase(procReadyIt); + } else if (currentlyReadyIter != currentlyReady.cend()) { + chosenNode = *currentlyReadyIter; + ++currentlyReadyIter; + } else { + break; + } + } + + newAssignments[proc0].push_back(chosenNode); + schedule.SetAssignedProcessor(chosenNode, proc0); + ++newTotalAssigned; + weightLimit += graph.VertexWorkWeight(chosenNode); + + for (const VertexType &succ : graph.Children(chosenNode)) { + if (--predec[succ] == 0) { + unsigned earliest = 0U; + bool differentProcParent = false; + for (const VertexType &par : graph.Parents(succ)) { + const bool differentProc = (schedule.AssignedProcessor(par) != proc0); + differentProcParent |= differentProc; + earliest = std::max(earliest, static_cast(differentProc) * schedule.AssignedSuperStep(par)); + } + earliest += static_cast(differentProcParent) * staleness; + + if (earliest <= superStep) { + stepProcReady[proc0].emplace(succ); + } else if (earliest < superStep + staleness) { + procReady[earliest % staleness][proc0].emplace(succ); + } else { + stepFutureReady.emplace_back(succ); + } + } + } + } // end while assigning + + totalWeightAssigned += weightLimit; + + // Processors 1 through P-1 + for (unsigned proc = 1U; proc < numProcs; ++proc) { + VWorkwT currentWeightAssigned = 0; + while (currentWeightAssigned < weightLimit) { + VertexType chosenNode = std::numeric_limits::max(); + { + const auto procReadyIt = stepProcReady[proc].cbegin(); + if (procReadyIt != stepProcReady[proc].cend()) { + chosenNode = *procReadyIt; + stepProcReady[proc].erase(procReadyIt); + } else if (currentlyReadyIter != currentlyReady.cend()) { + chosenNode = *currentlyReadyIter; + ++currentlyReadyIter; + } else { + break; + } + } + + newAssignments[proc].push_back(chosenNode); + schedule.SetAssignedProcessor(chosenNode, proc); + ++newTotalAssigned; + currentWeightAssigned += graph.VertexWorkWeight(chosenNode); + + for (const VertexType &succ : graph.Children(chosenNode)) { + if (--predec[succ] == 0) { + unsigned earliest = 0U; + bool differentProcParent = false; + for (const VertexType &par : graph.Parents(succ)) { + const bool differentProc = (schedule.AssignedProcessor(par) != proc); + differentProcParent |= differentProc; + earliest + = std::max(earliest, static_cast(differentProc) * schedule.AssignedSuperStep(par)); + } + earliest += static_cast(differentProcParent) * staleness; + + if (earliest <= superStep) { + stepProcReady[proc].emplace(succ); + } else if (earliest < superStep + staleness) { + procReady[earliest % staleness][proc].emplace(succ); + } else { + stepFutureReady.emplace_back(succ); + } + } + } + } // end while assigning + weightLimit = std::max(weightLimit, currentWeightAssigned); + totalWeightAssigned += currentWeightAssigned; + } // end processor loops + + bool acceptStep = false; + } + } + + return ReturnStatus::OSP_SUCCESS; +} + +} // end namespace osp From 74a176d4470d0ab2f70d0715236558822fbf251e Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Mon, 9 Feb 2026 16:52:47 +0100 Subject: [PATCH 02/13] progress --- .../GreedySchedulers/GrowLocalMaxBsp.hpp | 203 +++++++++++++++--- 1 file changed, 170 insertions(+), 33 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index fae4df84..e65a000a 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -20,7 +20,12 @@ limitations under the License. #include #include +#include +#include #include +#include +#include +#include #include #include "osp/bsp/scheduler/MaxBspScheduler.hpp" @@ -60,54 +65,89 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched const VertexType numVertices = graph.NumVertices(); const unsigned numProcs = instance.NumberOfProcessors(); - std::set currentlyReady; + std::deque currentlyReady; // vertices ready in current superstep - std::array, staleness> futureReady; - std::vector bestFutureReady; + std::array, staleness> futureReady; + // For i = 1,2,..,staleness, the vertices in futureReady[(superstep + i) % staleness] becomes ready globally in superstep + i + std::deque bestFutureReady; + // vertices to be added to futureReady[superstep % staleness] which become ready globally in superstep + staleness - std::array>, staleness> procReady(numProcs); - std::vector> bestProcReady(numProcs); + std::vector>> currentProcReadyHeaps(numProcs); + std::vector>> bestCurrentProcReadyHeaps(numProcs); + + std::array>>, staleness> procReady; + // For i = 0,1,2,..,staleness-1 and p processor, the vertices in procReady[(superstep + i) % staleness][p] are ready locally + // in superstep + i on processor p + std::array>>, staleness> procReadyAdditions; + std::array>>, staleness> bestProcReadyAdditions; + + for (auto &arrVal : procReady) { + arrVal = std::vector>>(numProcs); + } + for (auto &arrVal : procReadyAdditions) { + arrVal = std::vector>>(numProcs); + } + for (auto &arrVal : bestProcReadyAdditions) { + arrVal = std::vector>>(numProcs); + } std::vector predec(numVertices); for (const auto vert : graph.Vertices()) { predec[vert] = graph.InDegree(vert); if (predec[vert] == 0U) { - currentlyReady.insert(currentlyReady.end(), vert); + currentlyReady.emplace_back(vert); } } std::vector> newAssignments(numProcs); std::vector> bestNewAssignments(numProcs); - // const VWorkwT minWeightParallelCheck = params_.syncCostMultiplierParallelCheck_ * instance.SynchronisationCosts(); - // const VWorkwT minSuperstepWeight = params_.syncCostMultiplierMinSuperstepWeight_ * instance.SynchronisationCosts(); - // double desiredParallelism = static_cast(numProcs); + const VWorkwT minWeightParallelCheck = params_.syncCostMultiplierParallelCheck_ * instance.SynchronisationCosts(); + const VWorkwT minSuperstepWeight = params_.syncCostMultiplierMinSuperstepWeight_ * instance.SynchronisationCosts(); + + double desiredParallelism = static_cast(numProcs); VertexType totalAssigned = 0; unsigned superStep = 0U; while (totalAssigned < numVertices) { - unsigned reducedSuperStep = superStep % staleness; - std::vector> &stepProcReady = procReady[reducedSuperStep]; - std::vector &stepFutureReady = futureReady[reducedSuperStep]; + const unsigned reducedSuperStep = superStep % staleness; + + std::deque &stepFutureReady = futureReady[reducedSuperStep]; + std::sort(stepFutureReady); + const std::size_t lengthCurrentlyReady = currentlyReady.size(); + currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end()); + std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end()); + + std::vector> &stepProcReady = procReady[reducedSuperStep]; + for (auto &procHeap : stepProcReady) { + std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>); // min heap + } VertexType limit = params_.minSuperstepSize_; - double bestScore = 0; - double bestParallelism = 0; + double bestScore = 0.0; + double bestParallelism = 0.0; - typename std::set::const_iterator currentlyReadyIter; - typename std::set::const_iterator bestcurrentlyReadyIter; + typename std::deque::const_iterator currentlyReadyIter; + typename std::deque::const_iterator bestcurrentlyReadyIter; bool continueSuperstepAttemps = true; while (continueSuperstepAttemps) { - for (unsigned proc = 0; proc < p; proc++) { - newAssignments[proc].clear(); + for (auto &procAssignments : newAssignments) { + procAssignments.clear(); } stepFutureReady.clear(); + currentProcReadyHeaps = stepProcReady; currentlyReadyIter = currentlyReady.cbegin(); + for (auto &stepProcReadyAdditions : procReadyAdditions) { + for (auto &localStepProcReadyAdditions : stepProcReadyAdditions) { + localStepProcReadyAdditions.clear(); + } + } + VertexType newTotalAssigned = 0; VWorkwT weightLimit = 0; VWorkwT totalWeightAssigned = 0; @@ -115,12 +155,13 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched // Processor 0 constexpr unsigned proc0{0U}; while (newAssignments[proc0].size() < limit) { + std::vector> &proc0Heap = currentProcReadyHeaps[proc0]; VertexType chosenNode = std::numeric_limits::max(); { - const auto procReadyIt = stepProcReady[proc0].cbegin(); - if (procReadyIt != stepProcReady[proc0].cend()) { - chosenNode = *procReadyIt; - stepProcReady[proc0].erase(procReadyIt); + if (proc0Heap.size() != 0U) { + std::pop_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>); + chosenNode = proc0Heap.back().first; + proc0Heap.pop_back(); } else if (currentlyReadyIter != currentlyReady.cend()) { chosenNode = *currentlyReadyIter; ++currentlyReadyIter; @@ -131,6 +172,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched newAssignments[proc0].push_back(chosenNode); schedule.SetAssignedProcessor(chosenNode, proc0); + schedule.SetAssignedSuperstepNoUpdateNumSuperstep(chosenNode, superStep); ++newTotalAssigned; weightLimit += graph.VertexWorkWeight(chosenNode); @@ -146,15 +188,16 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched earliest += static_cast(differentProcParent) * staleness; if (earliest <= superStep) { - stepProcReady[proc0].emplace(succ); + proc0Heap.emplace_back(succ, superStep + staleness); + std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>); } else if (earliest < superStep + staleness) { - procReady[earliest % staleness][proc0].emplace(succ); + procReadyAdditions[earliest % staleness][proc0].emplace(succ, superStep + staleness); } else { stepFutureReady.emplace_back(succ); } } } - } // end while assigning + } // end while assigning totalWeightAssigned += weightLimit; @@ -162,12 +205,13 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched for (unsigned proc = 1U; proc < numProcs; ++proc) { VWorkwT currentWeightAssigned = 0; while (currentWeightAssigned < weightLimit) { + std::vector> &procHeap = currentProcReadyHeaps[proc]; VertexType chosenNode = std::numeric_limits::max(); { - const auto procReadyIt = stepProcReady[proc].cbegin(); - if (procReadyIt != stepProcReady[proc].cend()) { - chosenNode = *procReadyIt; - stepProcReady[proc].erase(procReadyIt); + if (procHeap.size() != 0U) { + std::pop_heap(procHeap.begin(), procHeap.end(), std::greater<>); + chosenNode = procHeap.back().first; + procHeap.pop_back(); } else if (currentlyReadyIter != currentlyReady.cend()) { chosenNode = *currentlyReadyIter; ++currentlyReadyIter; @@ -178,6 +222,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched newAssignments[proc].push_back(chosenNode); schedule.SetAssignedProcessor(chosenNode, proc); + schedule.SetAssignedSuperstepNoUpdateNumSuperstep(chosenNode, superStep); ++newTotalAssigned; currentWeightAssigned += graph.VertexWorkWeight(chosenNode); @@ -194,21 +239,113 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched earliest += static_cast(differentProcParent) * staleness; if (earliest <= superStep) { - stepProcReady[proc].emplace(succ); + procHeap.emplace_back(succ, superStep + staleness); + std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>); } else if (earliest < superStep + staleness) { - procReady[earliest % staleness][proc].emplace(succ); + procReadyAdditions[earliest % staleness][proc].emplace(succ, superStep + staleness); } else { stepFutureReady.emplace_back(succ); } } } - } // end while assigning + } // end while assigning weightLimit = std::max(weightLimit, currentWeightAssigned); totalWeightAssigned += currentWeightAssigned; - } // end processor loops + } // end processor loops bool acceptStep = false; + + double score + = static_cast(totalWeightAssigned) / static_cast(weightLimit + instance.SynchronisationCosts()); + double parallelism = 0; + if (weightLimit > 0) { + parallelism = static_cast(totalWeightAssigned) / static_cast(weightLimit); + } + + if (score > 0.99 * bestScore) { // It is possible to make this less strict, i.e. score > 0.98 * best_score. + // The purpose of this would be to encourage larger supersteps. + bestScore = std::max(bestScore, score); + bestParallelism = parallelism; + acceptStep = true; + } else { + continueSuperstepAttemps = false; + } + + if (weightLimit >= minWeightParallelCheck) { + if (parallelism < std::max(2.0, 0.8 * desiredParallelism)) { + continueSuperstepAttemps = false; + } + } + + if (weightLimit <= minSuperstepWeight) { + continueSuperstepAttemps = true; + if (totalAssigned + newTotalAssigned == n) { + acceptStep = true; + continueSuperstepAttemps = false; + } + } + + if (totalAssigned + newTotalAssigned == n) { + continueSuperstepAttemps = false; + } + + // Undo predec decreases + for (const auto &newLocalAssignments : newAssignments) { + for (const VertexType &node : newLocalAssignments) { + for (const VertexType &succ : graph.Children(node)) { + ++predec[succ]; + } + } + } + + if (acceptStep) { + std::swap(bestFutureReady, stepFutureReady); + std::swap(bestProcReadyAdditions, procReadyAdditions); + std::swap(bestcurrentlyReadyIter, currentlyReadyIter); + std::swap(bestNewAssignments, newAssignments); + std::swap(bestCurrentProcReadyHeaps, currentProcReadyHeaps); + } + + limit++; + limit += (limit / 2); + } + + // apply best iteration + currentlyReady.erase(currentlyReady.begin(), bestcurrentlyReadyIter); + std::swap(futureReady[reducedSuperStep], bestFutureReady); + + ++superStep; + for (unsigned proc = 0U; proc < numProcs; ++proc) { + for (const auto &vertStepPair : bestCurrentProcReadyHeaps[proc]) { + if (vertStepPair.second <= superStep) { + futureReady[superStep % staleness].emplace_back(vertStepPair.first); + } else { + procReady[superStep % staleness][proc].emplace_back(vertStepPair); + } + } + } + + for (std::size_t stepInd = 0U; stepInd < staleness; ++stepInd) { + for (unsigned proc = 0U; proc < numProcs; ++proc) { + procReady[stepInd][proc].insert(procReady[stepInd][proc].end(), + bestProcReadyAdditions[stepInd][proc].begin(), + bestProcReadyAdditions[stepInd][proc].end()); + } } + + for (unsigned proc = 0U; proc < numProcs; ++proc) { + totalAssigned += bestNewAssignments[proc].size(); + for (const VertexType &node : bestNewAssignments[proc]) { + schedule.SetAssignedProcessor(node, proc); + + for (const VertexType &succ : graph.Children(node)) { + --predec[succ]; + } + } + } + + desiredParallelism = (0.3 * desiredParallelism) + (0.6 * bestParallelism) + + (0.1 * static_cast(numProcs)); // weights should sum up to one } return ReturnStatus::OSP_SUCCESS; From 6c0d93bca9e99ebb904b7db71a56882a9883ef78 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Mon, 9 Feb 2026 17:26:30 +0100 Subject: [PATCH 03/13] progress --- .../GreedySchedulers/GrowLocalMaxBsp.hpp | 33 ++++++++++++------- tests/max_bsp_schedulers.cpp | 24 +++++++++++--- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index e65a000a..3ee726a1 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -49,15 +49,24 @@ class GrowLocalSSP : public MaxBspScheduler { private: using VertexType = VertexIdxT; - constexpr std::size_t staleness{2U}; + static constexpr std::size_t staleness{2U}; GrowLocalSSPParams, VWorkwT> params_; public: - ReturnStatus ComputeSchedule(MaxBspSchedule &schedule); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override; + ReturnStatus ComputeSchedule(MaxBspSchedule &schedule) override; std::string GetScheduleName() const override { return "GrowLocalSSP"; } }; +template +ReturnStatus GrowLocalSSP::ComputeSchedule(BspSchedule &schedule) { + MaxBspSchedule tmpSched(schedule.GetInstance()); + ReturnStatus status = ComputeSchedule(tmpSched); + schedule = tmpSched; + return status; +} + template ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &schedule) { const BspInstance &instance = schedule.GetInstance(); @@ -121,7 +130,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched std::vector> &stepProcReady = procReady[reducedSuperStep]; for (auto &procHeap : stepProcReady) { - std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>); // min heap + std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>{}); // min heap } VertexType limit = params_.minSuperstepSize_; @@ -155,11 +164,11 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched // Processor 0 constexpr unsigned proc0{0U}; while (newAssignments[proc0].size() < limit) { - std::vector> &proc0Heap = currentProcReadyHeaps[proc0]; + std::vector> &proc0Heap = currentProcReadyHeaps[proc0]; VertexType chosenNode = std::numeric_limits::max(); { if (proc0Heap.size() != 0U) { - std::pop_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>); + std::pop_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>{}); chosenNode = proc0Heap.back().first; proc0Heap.pop_back(); } else if (currentlyReadyIter != currentlyReady.cend()) { @@ -189,7 +198,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched if (earliest <= superStep) { proc0Heap.emplace_back(succ, superStep + staleness); - std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>); + std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>{}); } else if (earliest < superStep + staleness) { procReadyAdditions[earliest % staleness][proc0].emplace(succ, superStep + staleness); } else { @@ -205,11 +214,11 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched for (unsigned proc = 1U; proc < numProcs; ++proc) { VWorkwT currentWeightAssigned = 0; while (currentWeightAssigned < weightLimit) { - std::vector> &procHeap = currentProcReadyHeaps[proc]; + std::vector> &procHeap = currentProcReadyHeaps[proc]; VertexType chosenNode = std::numeric_limits::max(); { if (procHeap.size() != 0U) { - std::pop_heap(procHeap.begin(), procHeap.end(), std::greater<>); + std::pop_heap(procHeap.begin(), procHeap.end(), std::greater<>{}); chosenNode = procHeap.back().first; procHeap.pop_back(); } else if (currentlyReadyIter != currentlyReady.cend()) { @@ -240,7 +249,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched if (earliest <= superStep) { procHeap.emplace_back(succ, superStep + staleness); - std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>); + std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>{}); } else if (earliest < superStep + staleness) { procReadyAdditions[earliest % staleness][proc].emplace(succ, superStep + staleness); } else { @@ -279,13 +288,13 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched if (weightLimit <= minSuperstepWeight) { continueSuperstepAttemps = true; - if (totalAssigned + newTotalAssigned == n) { + if (totalAssigned + newTotalAssigned == numVertices) { acceptStep = true; continueSuperstepAttemps = false; } } - if (totalAssigned + newTotalAssigned == n) { + if (totalAssigned + newTotalAssigned == numVertices) { continueSuperstepAttemps = false; } @@ -348,6 +357,8 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched + (0.1 * static_cast(numProcs)); // weights should sum up to one } + schedule.SetNumberOfSupersteps(superStep); + return ReturnStatus::OSP_SUCCESS; } diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp index 908b0493..6e33487c 100644 --- a/tests/max_bsp_schedulers.cpp +++ b/tests/max_bsp_schedulers.cpp @@ -27,7 +27,9 @@ limitations under the License. #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp" #include "osp/bsp/scheduler/MaxBspScheduler.hpp" +#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "test_graphs.hpp" @@ -61,17 +63,19 @@ void RunTest(Scheduler *testScheduler) { std::cout << "Graph: " << nameGraph << std::endl; std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + ComputationalDagEdgeIdxVectorImplDefIntT graph; + BspArchitecture arch; - bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); - bool statusArchitecture - = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), graph); + bool statusArchitecture = file_reader::ReadBspArchitecture((cwd / filenameMachine).string(), arch); if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } + BspInstance instance(graph, arch); + BspSchedule schedule(instance); const auto result = testScheduler->ComputeSchedule(schedule); @@ -142,3 +146,15 @@ BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerMaxBspScheduleLargeTest) { GreedyVarianceSspScheduler test; RunTestMaxBsp(&test); } + +// Tests ComputeSchedule(BspSchedule&) → staleness = 1 +BOOST_AUTO_TEST_CASE(GrowLocalSSPBspScheduleLargeTest) { + GrowLocalSSP> test; + RunTest(&test); +} + +// Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2 +BOOST_AUTO_TEST_CASE(GrowLocalSSPMaxBspScheduleLargeTest) { + GrowLocalSSP> test; + RunTestMaxBsp(&test); +} \ No newline at end of file From 9204a06cd75220660c15ae036648240d883bcd30 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Mon, 9 Feb 2026 17:52:54 +0100 Subject: [PATCH 04/13] compiles --- include/osp/bsp/model/BspInstance.hpp | 16 +++++++++++++--- .../GreedySchedulers/GrowLocalMaxBsp.hpp | 16 ++++++++-------- tests/max_bsp_schedulers.cpp | 17 ++++++++++------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index 358947e9..0fcd8040 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -107,10 +107,20 @@ class BspInstance { * @param cdag The computational DAG for the instance. * @param architecture The BSP architecture for the instance. */ - BspInstance(const GraphT &cdag, + template + BspInstance(const OtherGraphT &cdag, const BspArchitecture &architecture, std::vector> nodeProcessorCompatibility = std::vector>({{true}})) - : cdag_(cdag), architecture_(architecture), nodeProcessorCompatibility_(nodeProcessorCompatibility) {} + : cdag_(cdag), architecture_(architecture), nodeProcessorCompatibility_(nodeProcessorCompatibility) { + static_assert(std::is_same_v, VMemwT>, + "BspArchitecture: GraphT and Graph_t_other have the same memory weight type."); + + static_assert(std::is_same_v, VCommwT>, + "BspArchitecture: GraphT and Graph_t_other have the same communication weight type."); + + static_assert(std::is_same_v, VTypeT>, + "BspArchitecture: GraphT and Graph_t_other have the same processor type."); + } /** * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture. @@ -319,7 +329,7 @@ class BspInstance { bool HasAnyTypeRestrictions() const { for (VertexTypeTOrDefault node_type = 0; node_type < nodeProcessorCompatibility_.size(); ++node_type) { for (VertexTypeTOrDefault proc_type = 0; proc_type < nodeProcessorCompatibility_[node_type].size(); ++proc_type) { - if(!nodeProcessorCompatibility_[node_type][proc_type]) { + if (!nodeProcessorCompatibility_[node_type][proc_type]) { return true; } } diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index 3ee726a1..0cc191a8 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -49,7 +49,7 @@ class GrowLocalSSP : public MaxBspScheduler { private: using VertexType = VertexIdxT; - static constexpr std::size_t staleness{2U}; + static constexpr unsigned staleness{2U}; GrowLocalSSPParams, VWorkwT> params_; public: @@ -123,12 +123,12 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched const unsigned reducedSuperStep = superStep % staleness; std::deque &stepFutureReady = futureReady[reducedSuperStep]; - std::sort(stepFutureReady); - const std::size_t lengthCurrentlyReady = currentlyReady.size(); + std::sort(stepFutureReady.begin(), stepFutureReady.end(), std::less<>{}); + const typename std::deque::difference_type lengthCurrentlyReady = std::distance(currentlyReady.begin(), currentlyReady.end()); currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end()); std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end()); - std::vector> &stepProcReady = procReady[reducedSuperStep]; + std::vector>> &stepProcReady = procReady[reducedSuperStep]; for (auto &procHeap : stepProcReady) { std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>{}); // min heap } @@ -192,7 +192,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched for (const VertexType &par : graph.Parents(succ)) { const bool differentProc = (schedule.AssignedProcessor(par) != proc0); differentProcParent |= differentProc; - earliest = std::max(earliest, static_cast(differentProc) * schedule.AssignedSuperStep(par)); + earliest = std::max(earliest, static_cast(differentProc) * schedule.AssignedSuperstep(par)); } earliest += static_cast(differentProcParent) * staleness; @@ -200,7 +200,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched proc0Heap.emplace_back(succ, superStep + staleness); std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>{}); } else if (earliest < superStep + staleness) { - procReadyAdditions[earliest % staleness][proc0].emplace(succ, superStep + staleness); + procReadyAdditions[earliest % staleness][proc0].emplace_back(succ, superStep + staleness); } else { stepFutureReady.emplace_back(succ); } @@ -243,7 +243,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched const bool differentProc = (schedule.AssignedProcessor(par) != proc); differentProcParent |= differentProc; earliest - = std::max(earliest, static_cast(differentProc) * schedule.AssignedSuperStep(par)); + = std::max(earliest, static_cast(differentProc) * schedule.AssignedSuperstep(par)); } earliest += static_cast(differentProcParent) * staleness; @@ -251,7 +251,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched procHeap.emplace_back(succ, superStep + staleness); std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>{}); } else if (earliest < superStep + staleness) { - procReadyAdditions[earliest % staleness][proc].emplace(succ, superStep + staleness); + procReadyAdditions[earliest % staleness][proc].emplace_back(succ, superStep + staleness); } else { stepFutureReady.emplace_back(succ); } diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp index 6e33487c..60f2b55d 100644 --- a/tests/max_bsp_schedulers.cpp +++ b/tests/max_bsp_schedulers.cpp @@ -36,6 +36,9 @@ limitations under the License. using namespace osp; +using VImpl1 = CDagVertexImpl; +using VImpl2 = CDagVertexImpl; + std::vector TestArchitectures() { return {"data/machine_params/p3.arch"}; } template @@ -63,7 +66,7 @@ void RunTest(Scheduler *testScheduler) { std::cout << "Graph: " << nameGraph << std::endl; std::cout << "Architecture: " << nameMachine << std::endl; - ComputationalDagEdgeIdxVectorImplDefIntT graph; + ComputationalDagVectorImpl graph; BspArchitecture arch; bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), graph); @@ -108,7 +111,7 @@ void RunTestMaxBsp(MaxBspScheduler *testScheduler) { << "Graph: " << nameGraph << std::endl << "Architecture: " << nameMachine << std::endl; - ComputationalDagEdgeIdxVectorImplDefIntT graph; + ComputationalDagVectorImpl graph; BspArchitecture arch; bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), graph); @@ -131,30 +134,30 @@ void RunTestMaxBsp(MaxBspScheduler *testScheduler) { // Tests ComputeSchedule(BspSchedule&) → staleness = 1 BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerTestVectorImpl) { - GreedyVarianceSspScheduler test; + GreedyVarianceSspScheduler> test; RunTest(&test); } // Tests ComputeSchedule(BspSchedule&) → staleness = 1 (different graph impl) BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerTestEdgeIdxImpl) { - GreedyVarianceSspScheduler test; + GreedyVarianceSspScheduler> test; RunTest(&test); } // Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2 BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerMaxBspScheduleLargeTest) { - GreedyVarianceSspScheduler test; + GreedyVarianceSspScheduler> test; RunTestMaxBsp(&test); } // Tests ComputeSchedule(BspSchedule&) → staleness = 1 BOOST_AUTO_TEST_CASE(GrowLocalSSPBspScheduleLargeTest) { - GrowLocalSSP> test; + GrowLocalSSP> test; RunTest(&test); } // Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2 BOOST_AUTO_TEST_CASE(GrowLocalSSPMaxBspScheduleLargeTest) { - GrowLocalSSP> test; + GrowLocalSSP> test; RunTestMaxBsp(&test); } \ No newline at end of file From 26f83efa75ece9f97560a46ac3ab2921df4f7d50 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Tue, 10 Feb 2026 08:53:45 +0100 Subject: [PATCH 05/13] some fixes --- .../GreedySchedulers/GrowLocalMaxBsp.hpp | 11 +++++--- tests/max_bsp_schedulers.cpp | 25 ++++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index 0cc191a8..6c58f79d 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -124,7 +124,8 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched std::deque &stepFutureReady = futureReady[reducedSuperStep]; std::sort(stepFutureReady.begin(), stepFutureReady.end(), std::less<>{}); - const typename std::deque::difference_type lengthCurrentlyReady = std::distance(currentlyReady.begin(), currentlyReady.end()); + const typename std::deque::difference_type lengthCurrentlyReady + = std::distance(currentlyReady.begin(), currentlyReady.end()); currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end()); std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end()); @@ -134,7 +135,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched } VertexType limit = params_.minSuperstepSize_; - double bestScore = 0.0; + double bestScore = std::numeric_limits::lowest(); double bestParallelism = 0.0; typename std::deque::const_iterator currentlyReadyIter; @@ -266,7 +267,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched double score = static_cast(totalWeightAssigned) / static_cast(weightLimit + instance.SynchronisationCosts()); - double parallelism = 0; + double parallelism = 0.0; if (weightLimit > 0) { parallelism = static_cast(totalWeightAssigned) / static_cast(weightLimit); } @@ -294,6 +295,10 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched } } + if (currentlyReadyIter == currentlyReady.cend()) { + continueSuperstepAttemps = false; + } + if (totalAssigned + newTotalAssigned == numVertices) { continueSuperstepAttemps = false; } diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp index 60f2b55d..e8a9721c 100644 --- a/tests/max_bsp_schedulers.cpp +++ b/tests/max_bsp_schedulers.cpp @@ -41,6 +41,23 @@ using VImpl2 = CDagVertexImpl; std::vector TestArchitectures() { return {"data/machine_params/p3.arch"}; } +template +void checkPrecedenceContraints(const BspSchedule &schedule, const unsigned staleness) { + for (const auto &v : schedule.GetInstance().GetComputationalDag().Vertices()) { + BOOST_CHECK_LT(schedule.AssignedSuperstep(v), schedule.NumberOfSupersteps()); + + for (const auto &chld : schedule.GetInstance().GetComputationalDag().Children(v)) { + const unsigned differentProcessors + = (schedule.AssignedProcessor(v) == schedule.AssignedProcessor(chld)) ? 0U : staleness; + + BOOST_CHECK_LE(schedule.AssignedSuperstep(v) + differentProcessors, schedule.AssignedSuperstep(chld)); + if (schedule.AssignedSuperstep(v) + differentProcessors > schedule.AssignedSuperstep(chld)) { + std::cout << "Vertex: " << v << " Child: " << chld << '\n'; + } + } + } +} + template void RunTest(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); @@ -84,6 +101,7 @@ void RunTest(Scheduler *testScheduler) { BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + checkPrecedenceContraints(schedule, 1U); } } } @@ -128,6 +146,7 @@ void RunTestMaxBsp(MaxBspScheduler *testScheduler) { BOOST_CHECK_EQUAL(result, ReturnStatus::OSP_SUCCESS); BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + checkPrecedenceContraints(schedule, 2U); } } } @@ -152,12 +171,12 @@ BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerMaxBspScheduleLargeTest) { // Tests ComputeSchedule(BspSchedule&) → staleness = 1 BOOST_AUTO_TEST_CASE(GrowLocalSSPBspScheduleLargeTest) { - GrowLocalSSP> test; + GrowLocalSSP> test; RunTest(&test); } // Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2 BOOST_AUTO_TEST_CASE(GrowLocalSSPMaxBspScheduleLargeTest) { - GrowLocalSSP> test; + GrowLocalSSP> test; RunTestMaxBsp(&test); -} \ No newline at end of file +} From 69f30c76d78671a41de8eb931b2707d33a00fa04 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Tue, 10 Feb 2026 10:48:48 +0100 Subject: [PATCH 06/13] progress --- .../GreedySchedulers/GrowLocalMaxBsp.hpp | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index 6c58f79d..cf979846 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -188,14 +188,12 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched for (const VertexType &succ : graph.Children(chosenNode)) { if (--predec[succ] == 0) { - unsigned earliest = 0U; - bool differentProcParent = false; + unsigned earliest = superStep; for (const VertexType &par : graph.Parents(succ)) { - const bool differentProc = (schedule.AssignedProcessor(par) != proc0); - differentProcParent |= differentProc; - earliest = std::max(earliest, static_cast(differentProc) * schedule.AssignedSuperstep(par)); + const bool sameProc = (schedule.AssignedProcessor(par) == proc0); + const unsigned constraint = sameProc ? superStep : schedule.AssignedSuperstep(par) + staleness; + earliest = std::max(earliest, constraint); } - earliest += static_cast(differentProcParent) * staleness; if (earliest <= superStep) { proc0Heap.emplace_back(succ, superStep + staleness); @@ -238,15 +236,12 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched for (const VertexType &succ : graph.Children(chosenNode)) { if (--predec[succ] == 0) { - unsigned earliest = 0U; - bool differentProcParent = false; + unsigned earliest = superStep; for (const VertexType &par : graph.Parents(succ)) { - const bool differentProc = (schedule.AssignedProcessor(par) != proc); - differentProcParent |= differentProc; - earliest - = std::max(earliest, static_cast(differentProc) * schedule.AssignedSuperstep(par)); + const bool sameProc = (schedule.AssignedProcessor(par) == proc); + const unsigned constraint = sameProc ? superStep : schedule.AssignedSuperstep(par) + staleness; + earliest = std::max(earliest, constraint); } - earliest += static_cast(differentProcParent) * staleness; if (earliest <= superStep) { procHeap.emplace_back(succ, superStep + staleness); @@ -328,13 +323,13 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched currentlyReady.erase(currentlyReady.begin(), bestcurrentlyReadyIter); std::swap(futureReady[reducedSuperStep], bestFutureReady); - ++superStep; + const unsigned nextSuperStep = superStep + 1U; for (unsigned proc = 0U; proc < numProcs; ++proc) { for (const auto &vertStepPair : bestCurrentProcReadyHeaps[proc]) { - if (vertStepPair.second <= superStep) { - futureReady[superStep % staleness].emplace_back(vertStepPair.first); + if (vertStepPair.second <= nextSuperStep) { + futureReady[nextSuperStep % staleness].emplace_back(vertStepPair.first); } else { - procReady[superStep % staleness][proc].emplace_back(vertStepPair); + procReady[nextSuperStep % staleness][proc].emplace_back(vertStepPair); } } } @@ -351,6 +346,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched totalAssigned += bestNewAssignments[proc].size(); for (const VertexType &node : bestNewAssignments[proc]) { schedule.SetAssignedProcessor(node, proc); + // schedule.SetAssignedSuperstepNoUpdateNumSuperstep(node, superStep); for (const VertexType &succ : graph.Children(node)) { --predec[succ]; @@ -358,6 +354,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched } } + ++superStep; desiredParallelism = (0.3 * desiredParallelism) + (0.6 * bestParallelism) + (0.1 * static_cast(numProcs)); // weights should sum up to one } From 519e75d2916a4ba2c4e6a3317fbe3e334cc3f6ac Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Tue, 10 Feb 2026 10:52:39 +0100 Subject: [PATCH 07/13] fixed bug --- .../scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp | 4 ++++ tests/max_bsp_schedulers.cpp | 11 ++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index cf979846..808597f7 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -323,6 +323,10 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched currentlyReady.erase(currentlyReady.begin(), bestcurrentlyReadyIter); std::swap(futureReady[reducedSuperStep], bestFutureReady); + for (auto &localProcReady : procReady[reducedSuperStep]) { + localProcReady.clear(); + } + const unsigned nextSuperStep = superStep + 1U; for (unsigned proc = 0U; proc < numProcs; ++proc) { for (const auto &vertStepPair : bestCurrentProcReadyHeaps[proc]) { diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp index e8a9721c..46a53511 100644 --- a/tests/max_bsp_schedulers.cpp +++ b/tests/max_bsp_schedulers.cpp @@ -47,12 +47,13 @@ void checkPrecedenceContraints(const BspSchedule &schedule, const unsign BOOST_CHECK_LT(schedule.AssignedSuperstep(v), schedule.NumberOfSupersteps()); for (const auto &chld : schedule.GetInstance().GetComputationalDag().Children(v)) { - const unsigned differentProcessors - = (schedule.AssignedProcessor(v) == schedule.AssignedProcessor(chld)) ? 0U : staleness; + const unsigned sameProcessors = (schedule.AssignedProcessor(v) == schedule.AssignedProcessor(chld)) ? 0U : staleness; - BOOST_CHECK_LE(schedule.AssignedSuperstep(v) + differentProcessors, schedule.AssignedSuperstep(chld)); - if (schedule.AssignedSuperstep(v) + differentProcessors > schedule.AssignedSuperstep(chld)) { - std::cout << "Vertex: " << v << " Child: " << chld << '\n'; + BOOST_CHECK_LE(schedule.AssignedSuperstep(v) + sameProcessors, schedule.AssignedSuperstep(chld)); + if (schedule.AssignedSuperstep(v) + sameProcessors > schedule.AssignedSuperstep(chld)) { + std::cout << "Vertex: " << v << " (S:" << schedule.AssignedSuperstep(v) << " P:" << schedule.AssignedProcessor(v) + << ")" << " Child: " << chld << " (S:" << schedule.AssignedSuperstep(chld) + << " P:" << schedule.AssignedProcessor(chld) << ")" << '\n'; } } } From 7e445bef75aaedc4eba7430847afd839a2b3e757 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Tue, 10 Feb 2026 11:13:59 +0100 Subject: [PATCH 08/13] removed commented code --- include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index 808597f7..5f37937e 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -350,7 +350,6 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched totalAssigned += bestNewAssignments[proc].size(); for (const VertexType &node : bestNewAssignments[proc]) { schedule.SetAssignedProcessor(node, proc); - // schedule.SetAssignedSuperstepNoUpdateNumSuperstep(node, superStep); for (const VertexType &succ : graph.Children(node)) { --predec[succ]; From 9215295a5c2975fb179bd39bbaa849c2418f01da Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Tue, 10 Feb 2026 11:36:38 +0100 Subject: [PATCH 09/13] removed restrictions --- .../osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index 5f37937e..05bcd53b 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -43,8 +43,6 @@ template class GrowLocalSSP : public MaxBspScheduler { static_assert(isDirectedGraphV); static_assert(hasVertexWeightsV); - static_assert(hasVerticesInTopOrderV); - static_assert(hasChildrenInVertexOrderV); private: using VertexType = VertexIdxT; @@ -107,6 +105,9 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched currentlyReady.emplace_back(vert); } } + if constexpr (not hasVerticesInTopOrderV) { + std::sort(currentlyReady.begin(), currentlyReady.end(), std::less<>{}); + } std::vector> newAssignments(numProcs); std::vector> bestNewAssignments(numProcs); From 63f9a7dea670251dc5a0e6d524926be2733afbea Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Tue, 10 Feb 2026 14:55:03 +0100 Subject: [PATCH 10/13] conditioning for next superstep --- .../scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index 05bcd53b..4625d2cd 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -130,6 +130,15 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end()); std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end()); + const typename std::deque::difference_type maxCurrentlyReadyUsage + = (staleness == 1U) ? std::distance(currentlyReady.begin(), currentlyReady.end()) + : ((std::distance(currentlyReady.begin(), currentlyReady.end()) + + std::distance(futureReady[(superStep + 1U) % staleness].begin(), + futureReady[(superStep + 1U) % staleness].end()) + + 2) + / 3) + * 2; + std::vector>> &stepProcReady = procReady[reducedSuperStep]; for (auto &procHeap : stepProcReady) { std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>{}); // min heap @@ -295,6 +304,10 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched continueSuperstepAttemps = false; } + if (std::distance(currentlyReady.cbegin(), currentlyReadyIter) > maxCurrentlyReadyUsage) { + continueSuperstepAttemps = false; + } + if (totalAssigned + newTotalAssigned == numVertices) { continueSuperstepAttemps = false; } From 36f9f9da661a23f9a246bc93a086d45d8c3bbc77 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Tue, 10 Feb 2026 15:24:52 +0100 Subject: [PATCH 11/13] improved lower bound on allready usage --- .../GreedySchedulers/GrowLocalMaxBsp.hpp | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index 4625d2cd..9f49985c 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -34,7 +34,7 @@ namespace osp { template struct GrowLocalSSPParams { - VertT minSuperstepSize_ = 20; + VertT minSuperstepSize_ = 10; WeightT syncCostMultiplierMinSuperstepWeight_ = 1; WeightT syncCostMultiplierParallelCheck_ = 4; }; @@ -50,6 +50,9 @@ class GrowLocalSSP : public MaxBspScheduler { static constexpr unsigned staleness{2U}; GrowLocalSSPParams, VWorkwT> params_; + typename std::deque::difference_type maxAllReadyUsage(const std::deque ¤tlyReady, + const std::deque &nextSuperstepReady) const; + public: ReturnStatus ComputeSchedule(BspSchedule &schedule) override; ReturnStatus ComputeSchedule(MaxBspSchedule &schedule) override; @@ -57,6 +60,23 @@ class GrowLocalSSP : public MaxBspScheduler { std::string GetScheduleName() const override { return "GrowLocalSSP"; } }; +template +typename std::deque>::difference_type GrowLocalSSP::maxAllReadyUsage( + const std::deque> ¤tlyReady, const std::deque> &nextSuperstepReady) const { + if constexpr (staleness == 1U) { + return std::distance(currentlyReady.cbegin(), currentlyReady.cend()); + } else { + typename std::deque::difference_type lengthCurrently + = std::distance(currentlyReady.cbegin(), currentlyReady.cend()); + typename std::deque::difference_type lengthNext + = std::distance(nextSuperstepReady.cbegin(), nextSuperstepReady.cend()); + + typename std::deque::difference_type ans = ((lengthCurrently + lengthNext + 2) / 3) * 2; + + return ans; + } +} + template ReturnStatus GrowLocalSSP::ComputeSchedule(BspSchedule &schedule) { MaxBspSchedule tmpSched(schedule.GetInstance()); @@ -131,13 +151,9 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end()); const typename std::deque::difference_type maxCurrentlyReadyUsage - = (staleness == 1U) ? std::distance(currentlyReady.begin(), currentlyReady.end()) - : ((std::distance(currentlyReady.begin(), currentlyReady.end()) - + std::distance(futureReady[(superStep + 1U) % staleness].begin(), - futureReady[(superStep + 1U) % staleness].end()) - + 2) - / 3) - * 2; + = std::max(static_cast::difference_type>( + static_cast(params_.minSuperstepSize_) * desiredParallelism), + maxAllReadyUsage(currentlyReady, futureReady[(superStep + 1U) % staleness])); std::vector>> &stepProcReady = procReady[reducedSuperStep]; for (auto &procHeap : stepProcReady) { From 3a10ac55b3253258123e475f1d7f97f772c68810 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Wed, 11 Feb 2026 10:15:15 +0100 Subject: [PATCH 12/13] added compare to inplace merge --- include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index 9f49985c..d15900f3 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -148,7 +148,7 @@ ReturnStatus GrowLocalSSP::ComputeSchedule(MaxBspSchedule &sched const typename std::deque::difference_type lengthCurrentlyReady = std::distance(currentlyReady.begin(), currentlyReady.end()); currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end()); - std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end()); + std::inplace_merge(currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end(), std::less<>{}); const typename std::deque::difference_type maxCurrentlyReadyUsage = std::max(static_cast::difference_type>( From e7acfbf357fece77e8f30d8d1339767e16b77859 Mon Sep 17 00:00:00 2001 From: Raphael Steiner Date: Wed, 11 Feb 2026 11:38:22 +0100 Subject: [PATCH 13/13] changed to base function call --- .../osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp index d15900f3..8ce849c6 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp @@ -79,10 +79,7 @@ typename std::deque>::difference_type GrowLocalSSP::m template ReturnStatus GrowLocalSSP::ComputeSchedule(BspSchedule &schedule) { - MaxBspSchedule tmpSched(schedule.GetInstance()); - ReturnStatus status = ComputeSchedule(tmpSched); - schedule = tmpSched; - return status; + return MaxBspScheduler::ComputeSchedule(schedule); } template