From b9f25d943a092676bd7dc2c33faa298d62c4e60e Mon Sep 17 00:00:00 2001 From: Simon Shillaker Date: Mon, 5 Jul 2021 16:06:03 +0000 Subject: [PATCH 1/4] Only do snapshot pushing if there are registered hosts --- src/scheduler/Scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp index 79c2a8515..88a66ad78 100644 --- a/src/scheduler/Scheduler.cpp +++ b/src/scheduler/Scheduler.cpp @@ -267,7 +267,7 @@ std::vector Scheduler::callFunctions( "Empty snapshot for distributed threads/ processes"); } - if (snapshotNeeded) { + if (snapshotNeeded && !registeredHosts.empty()) { snapshotData = faabric::snapshot::getSnapshotRegistry().getSnapshot(snapshotKey); snapshotDiffs = snapshotData.getDirtyPages(); From a1378c3fff4aa5aad5056d6471b35b05bbd23499 Mon Sep 17 00:00:00 2001 From: Simon Shillaker Date: Mon, 5 Jul 2021 16:29:51 +0000 Subject: [PATCH 2/4] Move snapshot stuff to snapshot module --- include/faabric/{scheduler => snapshot}/SnapshotApi.h | 0 include/faabric/{scheduler => snapshot}/SnapshotClient.h | 0 include/faabric/{scheduler => snapshot}/SnapshotServer.h | 0 src/scheduler/CMakeLists.txt | 4 +--- src/snapshot/CMakeLists.txt | 4 +++- src/{scheduler => snapshot}/SnapshotClient.cpp | 0 src/{scheduler => snapshot}/SnapshotServer.cpp | 0 7 files changed, 4 insertions(+), 4 deletions(-) rename include/faabric/{scheduler => snapshot}/SnapshotApi.h (100%) rename include/faabric/{scheduler => snapshot}/SnapshotClient.h (100%) rename include/faabric/{scheduler => snapshot}/SnapshotServer.h (100%) rename src/{scheduler => snapshot}/SnapshotClient.cpp (100%) rename src/{scheduler => snapshot}/SnapshotServer.cpp (100%) diff --git a/include/faabric/scheduler/SnapshotApi.h b/include/faabric/snapshot/SnapshotApi.h similarity index 100% rename from include/faabric/scheduler/SnapshotApi.h rename to include/faabric/snapshot/SnapshotApi.h diff --git a/include/faabric/scheduler/SnapshotClient.h b/include/faabric/snapshot/SnapshotClient.h similarity index 100% rename from include/faabric/scheduler/SnapshotClient.h rename to include/faabric/snapshot/SnapshotClient.h diff --git a/include/faabric/scheduler/SnapshotServer.h b/include/faabric/snapshot/SnapshotServer.h similarity index 100% rename from include/faabric/scheduler/SnapshotServer.h rename to include/faabric/snapshot/SnapshotServer.h diff --git a/src/scheduler/CMakeLists.txt b/src/scheduler/CMakeLists.txt index 4e8c271ad..425160e31 100644 --- a/src/scheduler/CMakeLists.txt +++ b/src/scheduler/CMakeLists.txt @@ -7,8 +7,6 @@ set(LIB_FILES FunctionCallClient.cpp FunctionCallServer.cpp Scheduler.cpp - SnapshotServer.cpp - SnapshotClient.cpp MpiContext.cpp MpiMessageBuffer.cpp MpiWorldRegistry.cpp @@ -18,4 +16,4 @@ set(LIB_FILES faabric_lib(scheduler "${LIB_FILES}") -target_link_libraries(scheduler flat proto snapshot state faabricmpi redis transport) +target_link_libraries(scheduler snapshot state faabricmpi redis) diff --git a/src/snapshot/CMakeLists.txt b/src/snapshot/CMakeLists.txt index 85fe874e3..c26f94b8e 100644 --- a/src/snapshot/CMakeLists.txt +++ b/src/snapshot/CMakeLists.txt @@ -1,10 +1,12 @@ file(GLOB HEADERS "${FAABRIC_INCLUDE_DIR}/faabric/snapshot/*.h") set(LIB_FILES + SnapshotClient.cpp SnapshotRegistry.cpp + SnapshotServer.cpp ${HEADERS} ) faabric_lib(snapshot "${LIB_FILES}") -target_link_libraries(snapshot proto util) +target_link_libraries(snapshot proto flat transport util) diff --git a/src/scheduler/SnapshotClient.cpp b/src/snapshot/SnapshotClient.cpp similarity index 100% rename from src/scheduler/SnapshotClient.cpp rename to src/snapshot/SnapshotClient.cpp diff --git a/src/scheduler/SnapshotServer.cpp b/src/snapshot/SnapshotServer.cpp similarity index 100% rename from src/scheduler/SnapshotServer.cpp rename to src/snapshot/SnapshotServer.cpp From 79d81d77c3784a0487cbe23442022dd6f8b4cdb6 Mon Sep 17 00:00:00 2001 From: Simon Shillaker Date: Mon, 5 Jul 2021 16:43:42 +0000 Subject: [PATCH 3/4] Fix compilation --- include/faabric/runner/FaabricMain.h | 4 +-- include/faabric/scheduler/Scheduler.h | 4 +-- include/faabric/snapshot/SnapshotApi.h | 2 +- include/faabric/snapshot/SnapshotClient.h | 6 ++--- include/faabric/snapshot/SnapshotServer.h | 4 +-- src/scheduler/Scheduler.cpp | 5 ++-- src/snapshot/SnapshotClient.cpp | 4 +-- src/snapshot/SnapshotServer.cpp | 12 ++++----- tests/dist/scheduler/test_snapshots.cpp | 2 +- tests/test/scheduler/test_executor.cpp | 25 +++++++++---------- tests/test/scheduler/test_scheduler.cpp | 8 +++--- .../scheduler/test_snapshot_client_server.cpp | 8 +++--- tests/utils/fixtures.h | 4 +-- tests/utils/system_utils.cpp | 4 +-- 14 files changed, 46 insertions(+), 46 deletions(-) diff --git a/include/faabric/runner/FaabricMain.h b/include/faabric/runner/FaabricMain.h index 11c94e7d7..7f49f7727 100644 --- a/include/faabric/runner/FaabricMain.h +++ b/include/faabric/runner/FaabricMain.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -28,6 +28,6 @@ class FaabricMain private: faabric::state::StateServer stateServer; faabric::scheduler::FunctionCallServer functionServer; - faabric::scheduler::SnapshotServer snapshotServer; + faabric::snapshot::SnapshotServer snapshotServer; }; } diff --git a/include/faabric/scheduler/Scheduler.h b/include/faabric/scheduler/Scheduler.h index 68ed0574e..423e1f860 100644 --- a/include/faabric/scheduler/Scheduler.h +++ b/include/faabric/scheduler/Scheduler.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -187,7 +187,7 @@ class Scheduler faabric::scheduler::FunctionCallClient& getFunctionCallClient( const std::string& otherHost); - faabric::scheduler::SnapshotClient& getSnapshotClient( + faabric::snapshot::SnapshotClient& getSnapshotClient( const std::string& otherHost); faabric::HostResources thisHostResources; diff --git a/include/faabric/snapshot/SnapshotApi.h b/include/faabric/snapshot/SnapshotApi.h index c6cafc3a7..82e93f792 100644 --- a/include/faabric/snapshot/SnapshotApi.h +++ b/include/faabric/snapshot/SnapshotApi.h @@ -1,6 +1,6 @@ #pragma once -namespace faabric::scheduler { +namespace faabric::snapshot { enum SnapshotCalls { NoSnapshotCall = 0, diff --git a/include/faabric/snapshot/SnapshotClient.h b/include/faabric/snapshot/SnapshotClient.h index 6d7fd8655..3129755ce 100644 --- a/include/faabric/snapshot/SnapshotClient.h +++ b/include/faabric/snapshot/SnapshotClient.h @@ -1,12 +1,12 @@ #pragma once #include -#include +#include #include #include #include -namespace faabric::scheduler { +namespace faabric::snapshot { // ----------------------------------- // Mocking @@ -57,6 +57,6 @@ class SnapshotClient final : public faabric::transport::MessageEndpointClient const std::vector& diffs); private: - void sendHeader(faabric::scheduler::SnapshotCalls call); + void sendHeader(faabric::snapshot::SnapshotCalls call); }; } diff --git a/include/faabric/snapshot/SnapshotServer.h b/include/faabric/snapshot/SnapshotServer.h index 861bb75a0..9919c282b 100644 --- a/include/faabric/snapshot/SnapshotServer.h +++ b/include/faabric/snapshot/SnapshotServer.h @@ -2,10 +2,10 @@ #include #include -#include +#include #include -namespace faabric::scheduler { +namespace faabric::snapshot { class SnapshotServer final : public faabric::transport::MessageEndpointServer { public: diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp index 88a66ad78..f179ff063 100644 --- a/src/scheduler/Scheduler.cpp +++ b/src/scheduler/Scheduler.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -19,6 +19,7 @@ #define FLUSH_TIMEOUT_MS 10000 using namespace faabric::util; +using namespace faabric::snapshot; namespace faabric::scheduler { @@ -31,7 +32,7 @@ static thread_local std::unordered_map + faabric::snapshot::SnapshotClient> snapshotClients; Scheduler& getScheduler() diff --git a/src/snapshot/SnapshotClient.cpp b/src/snapshot/SnapshotClient.cpp index d3f80bc87..deff086dd 100644 --- a/src/snapshot/SnapshotClient.cpp +++ b/src/snapshot/SnapshotClient.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -6,7 +6,7 @@ #include #include -namespace faabric::scheduler { +namespace faabric::snapshot { // ----------------------------------- // Mocking diff --git a/src/snapshot/SnapshotServer.cpp b/src/snapshot/SnapshotServer.cpp index f220b5ed1..be6bf086f 100644 --- a/src/snapshot/SnapshotServer.cpp +++ b/src/snapshot/SnapshotServer.cpp @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include #include #include @@ -10,7 +10,7 @@ #include -namespace faabric::scheduler { +namespace faabric::snapshot { SnapshotServer::SnapshotServer() : faabric::transport::MessageEndpointServer(SNAPSHOT_ASYNC_PORT, SNAPSHOT_SYNC_PORT) @@ -21,11 +21,11 @@ void SnapshotServer::doAsyncRecv(int header, size_t bufferSize) { switch (header) { - case faabric::scheduler::SnapshotCalls::DeleteSnapshot: { + case faabric::snapshot::SnapshotCalls::DeleteSnapshot: { this->recvDeleteSnapshot(buffer, bufferSize); break; } - case faabric::scheduler::SnapshotCalls::ThreadResult: { + case faabric::snapshot::SnapshotCalls::ThreadResult: { this->recvThreadResult(buffer, bufferSize); break; } @@ -40,10 +40,10 @@ std::unique_ptr SnapshotServer::doSyncRecv(int header, const uint8_t* buffer, size_t bufferSize) { switch (header) { - case faabric::scheduler::SnapshotCalls::PushSnapshot: { + case faabric::snapshot::SnapshotCalls::PushSnapshot: { return recvPushSnapshot(buffer, bufferSize); } - case faabric::scheduler::SnapshotCalls::PushSnapshotDiffs: { + case faabric::snapshot::SnapshotCalls::PushSnapshotDiffs: { return recvPushSnapshotDiffs(buffer, bufferSize); } default: { diff --git a/tests/dist/scheduler/test_snapshots.cpp b/tests/dist/scheduler/test_snapshots.cpp index 93d1c613e..9298c5ce7 100644 --- a/tests/dist/scheduler/test_snapshots.cpp +++ b/tests/dist/scheduler/test_snapshots.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/tests/test/scheduler/test_executor.cpp b/tests/test/scheduler/test_executor.cpp index 87a87b236..806bf3cc7 100644 --- a/tests/test/scheduler/test_executor.cpp +++ b/tests/test/scheduler/test_executor.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -468,7 +468,7 @@ TEST_CASE_METHOD(TestExecutorFixture, REQUIRE(actualHost == otherHost); // Check the snapshot has been pushed to the other host - auto snapPushes = faabric::scheduler::getSnapshotPushes(); + auto snapPushes = faabric::snapshot::getSnapshotPushes(); REQUIRE(snapPushes.size() == 1); REQUIRE(snapPushes.at(0).first == otherHost); @@ -484,7 +484,7 @@ TEST_CASE_METHOD(TestExecutorFixture, REQUIRE(restoreCount == 1); // Process the thread result requests - auto results = faabric::scheduler::getThreadResults(); + auto results = faabric::snapshot::getThreadResults(); for (auto& r : results) { REQUIRE(r.first == thisHost); @@ -524,8 +524,8 @@ TEST_CASE_METHOD(TestExecutorFixture, // Note that because the results don't actually get logged on this host, we // can't wait on them as usual. - auto actual = faabric::scheduler::getThreadResults(); - REQUIRE_RETRY(actual = faabric::scheduler::getThreadResults(), + auto actual = faabric::snapshot::getThreadResults(); + REQUIRE_RETRY(actual = faabric::snapshot::getThreadResults(), actual.size() == nThreads); std::vector actualMessageIds; @@ -714,11 +714,10 @@ TEST_CASE_METHOD(TestExecutorFixture, // Results aren't set on this host as it's not the master, so we have to // wait - REQUIRE_RETRY({}, - faabric::scheduler::getThreadResults().size() == nThreads); + REQUIRE_RETRY({}, faabric::snapshot::getThreadResults().size() == nThreads); // Check results have been sent back to the master host - auto actualResults = faabric::scheduler::getThreadResults(); + auto actualResults = faabric::snapshot::getThreadResults(); REQUIRE(actualResults.size() == nThreads); // Check only one has diffs attached @@ -814,18 +813,18 @@ TEST_CASE_METHOD(TestExecutorFixture, REQUIRE(sch.getFunctionRegisteredHosts(msg) == expectedRegistered); // Check snapshot has been pushed - auto pushes = faabric::scheduler::getSnapshotPushes(); + auto pushes = faabric::snapshot::getSnapshotPushes(); REQUIRE(pushes.at(0).first == otherHost); REQUIRE(pushes.at(0).second.size == snapshotSize); - REQUIRE(faabric::scheduler::getSnapshotDiffPushes().empty()); + REQUIRE(faabric::snapshot::getSnapshotDiffPushes().empty()); // Check that we're not registering any dirty pages on the snapshot faabric::util::SnapshotData& snap = reg.getSnapshot(snapshotKey); REQUIRE(snap.getDirtyPages().empty()); // Now reset snapshot pushes of all kinds - faabric::scheduler::clearMockSnapshotRequests(); + faabric::snapshot::clearMockSnapshotRequests(); // Make an edit to the snapshot memory and get the expected diffs snap.data[0] = 9; @@ -850,10 +849,10 @@ TEST_CASE_METHOD(TestExecutorFixture, sch.awaitThreadResult(reqB->mutable_messages()->at(0).id()); // Check the full snapshot hasn't been pushed - REQUIRE(faabric::scheduler::getSnapshotPushes().empty()); + REQUIRE(faabric::snapshot::getSnapshotPushes().empty()); // Check the diffs are pushed as expected - auto diffPushes = faabric::scheduler::getSnapshotDiffPushes(); + auto diffPushes = faabric::snapshot::getSnapshotDiffPushes(); REQUIRE(diffPushes.size() == 1); REQUIRE(diffPushes.at(0).first == otherHost); std::vector actualDiffs = diff --git a/tests/test/scheduler/test_scheduler.cpp b/tests/test/scheduler/test_scheduler.cpp index dfb00fe9c..2a0e41c44 100644 --- a/tests/test/scheduler/test_scheduler.cpp +++ b/tests/test/scheduler/test_scheduler.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -272,7 +272,7 @@ TEST_CASE_METHOD(SlowExecutorFixture, "Test batch scheduling", "[scheduler]") REQUIRE(resRequestsOne.at(0).first == otherHost); // Check snapshots have been pushed - auto snapshotPushes = faabric::scheduler::getSnapshotPushes(); + auto snapshotPushes = faabric::snapshot::getSnapshotPushes(); if (expectedSnapshot.empty()) { REQUIRE(snapshotPushes.empty()); } else { @@ -749,7 +749,7 @@ TEST_CASE_METHOD(SlowExecutorFixture, for (auto h : expectedHosts) { expectedDeleteRequests.push_back({ h, snapKey }); }; - auto actualDeleteRequests = faabric::scheduler::getSnapshotDeletes(); + auto actualDeleteRequests = faabric::snapshot::getSnapshotDeletes(); REQUIRE(actualDeleteRequests == expectedDeleteRequests); } @@ -791,7 +791,7 @@ TEST_CASE_METHOD(SlowExecutorFixture, } // Check the results have been pushed along with the thread result - auto actualResults = faabric::scheduler::getThreadResults(); + auto actualResults = faabric::snapshot::getThreadResults(); REQUIRE(actualResults.size() == 1); REQUIRE(actualResults.at(0).first == "otherHost"); diff --git a/tests/test/scheduler/test_snapshot_client_server.cpp b/tests/test/scheduler/test_snapshot_client_server.cpp index 9319c76ed..b6b3ef8db 100644 --- a/tests/test/scheduler/test_snapshot_client_server.cpp +++ b/tests/test/scheduler/test_snapshot_client_server.cpp @@ -4,9 +4,9 @@ #include -#include -#include +#include #include +#include #include #include #include @@ -22,8 +22,8 @@ class SnapshotClientServerFixture , public SnapshotTestFixture { protected: - faabric::scheduler::SnapshotServer server; - faabric::scheduler::SnapshotClient cli; + faabric::snapshot::SnapshotServer server; + faabric::snapshot::SnapshotClient cli; public: SnapshotClientServerFixture() diff --git a/tests/utils/fixtures.h b/tests/utils/fixtures.h index eeac4aa9f..7b03b79e2 100644 --- a/tests/utils/fixtures.h +++ b/tests/utils/fixtures.h @@ -68,7 +68,7 @@ class SchedulerTestFixture faabric::util::setTestMode(true); faabric::scheduler::clearMockRequests(); - faabric::scheduler::clearMockSnapshotRequests(); + faabric::snapshot::clearMockSnapshotRequests(); sch.shutdown(); sch.addHostToGlobalSet(); @@ -80,7 +80,7 @@ class SchedulerTestFixture faabric::util::setTestMode(true); faabric::scheduler::clearMockRequests(); - faabric::scheduler::clearMockSnapshotRequests(); + faabric::snapshot::clearMockSnapshotRequests(); sch.shutdown(); sch.addHostToGlobalSet(); diff --git a/tests/utils/system_utils.cpp b/tests/utils/system_utils.cpp index 3e67bf3ea..411a0bf92 100644 --- a/tests/utils/system_utils.cpp +++ b/tests/utils/system_utils.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -50,7 +50,7 @@ void cleanFaabric() faabric::util::setTestMode(true); faabric::util::setMockMode(false); faabric::scheduler::clearMockRequests(); - faabric::scheduler::clearMockSnapshotRequests(); + faabric::snapshot::clearMockSnapshotRequests(); // Set up dummy executor factory std::shared_ptr fac = From 37c2ff896102e30aefbd3ebdd0bd706bc0a7936f Mon Sep 17 00:00:00 2001 From: Simon Shillaker Date: Mon, 5 Jul 2021 16:45:03 +0000 Subject: [PATCH 4/4] Revert unrelated change --- src/scheduler/Scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scheduler/Scheduler.cpp b/src/scheduler/Scheduler.cpp index f179ff063..0869a9be0 100644 --- a/src/scheduler/Scheduler.cpp +++ b/src/scheduler/Scheduler.cpp @@ -268,7 +268,7 @@ std::vector Scheduler::callFunctions( "Empty snapshot for distributed threads/ processes"); } - if (snapshotNeeded && !registeredHosts.empty()) { + if (snapshotNeeded) { snapshotData = faabric::snapshot::getSnapshotRegistry().getSnapshot(snapshotKey); snapshotDiffs = snapshotData.getDirtyPages();