From 31f1ce360a4b2bf0b234a967a7af5112b3e29625 Mon Sep 17 00:00:00 2001 From: Michael Meeks Date: Thu, 21 Mar 2024 16:11:37 +0000 Subject: [PATCH] crash test - cope with more complexity around re-starting. With faster starting spare kits we can end up waiting for a state where there are no live or spare kits, but the spare kit has already beaten us to the punch: [testCrashKit] (+1124ms): Killing coolkit instances.| httpcrashtest.cpp:151 [killPid] (+1124ms): Killing 1650047| httpcrashtest.cpp:257 [killPid] (+1124ms): Killing 1650071| httpcrashtest.cpp:257 [waitForKitProcessCount] (+1124ms): Waiting for kit process count: Doc Kits: 0 Spare Kits: 0 | KitPidHelpers.cpp:70 [waitForKitProcessCount] (+1124ms): Current kit processes: Doc Kits: [1650047, ] Spare Kits: [1650071, ]| KitPidHelpers.cpp:80 ... Forking a coolkit process with jailId: 09X67pOy1HAgSk9G as spare coolkit #5.| kit/ForKit.cpp:406 ... [waitForKitProcessCount] (+1558ms): Current kit processes: Doc Kits: [] Spare Kits: [1650083, ]| KitPidHelpers.cpp:94 ... fail ... Avoid this by intersecting before & after to ensure are all before are dead. Cleanup exposing the more problematic wait-for-zero-spare method. Signed-off-by: Michael Meeks Change-Id: I6f0ba87139b58f9bf1770ebbc5cac95b5063679e --- test/KitPidHelpers.cpp | 63 +++++++++++++++++++++++++++++++++++------- test/KitPidHelpers.hpp | 16 ++++------- test/httpcrashtest.cpp | 34 ++++------------------- 3 files changed, 65 insertions(+), 48 deletions(-) diff --git a/test/KitPidHelpers.cpp b/test/KitPidHelpers.cpp index a9ac79baf2..8183f8ecf9 100644 --- a/test/KitPidHelpers.cpp +++ b/test/KitPidHelpers.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -58,7 +59,8 @@ void helpers::logKitProcesses(const std::string& testname) << " Spare Kits: " << getPidList(spareKitPids)); } -void helpers::waitForKitProcessCount( +namespace { +void waitForKitProcessCount( const std::string& testname, int numDocKits, int numSpareKits /* = -1 */, @@ -69,8 +71,8 @@ void helpers::waitForKitProcessCount( << (numDocKits >= 0 ? "Doc Kits: " + std::to_string(numDocKits) + " " : "") << (numSpareKits >= 0 ? " Spare Kits: " + std::to_string(numSpareKits) + " " : "")); - std::set docKitPids = getDocKitPids(); - std::set spareKitPids = getSpareKitPids(); + std::set docKitPids = helpers::getDocKitPids(); + std::set spareKitPids = helpers::getSpareKitPids(); bool pass = (numDocKits < 0 || docKitPids.size() == static_cast(numDocKits)) && (numSpareKits < 0 || spareKitPids.size() == static_cast(numSpareKits)); int tries = (timeoutMs / retryMs); @@ -83,8 +85,8 @@ void helpers::waitForKitProcessCount( { std::this_thread::sleep_for(retryMs); - docKitPids = getDocKitPids(); - spareKitPids = getSpareKitPids(); + docKitPids = helpers::getDocKitPids(); + spareKitPids = helpers::getSpareKitPids(); pass = (numDocKits < 0 || docKitPids.size() == static_cast(numDocKits)) && (numSpareKits < 0 || spareKitPids.size() == static_cast(numSpareKits)); tries--; @@ -111,6 +113,7 @@ void helpers::waitForKitProcessCount( LOK_ASSERT_FAIL("Timed out waiting for kit process count: " + oss.str()); } } +} void helpers::waitForKitPidsReady( const std::string& testname, @@ -120,10 +123,50 @@ void helpers::waitForKitPidsReady( waitForKitProcessCount(testname, 0, 1, timeoutMs, retryMs); } -void helpers::waitForKitPidsKilled( - const std::string& testname, - const std::chrono::milliseconds timeoutMs /* = KIT_PID_TIMEOUT_MS */, - const std::chrono::milliseconds retryMs /* = KIT_PID_RETRY_MS */) +void helpers::killPid(const std::string& testname, const pid_t pid) { - waitForKitProcessCount(testname, 0, 0, timeoutMs, retryMs); + TST_LOG("Killing " << pid); + if (kill(pid, SIGKILL) == -1) + TST_LOG("kill(" << pid << ", SIGKILL) failed: " << + Util::symbolicErrno(errno) << ": " << std::strerror(errno)); +} + +void helpers::killAllKitProcesses( + const std::string& testname, + const std::chrono::milliseconds timeoutMs /* = COMMAND_TIMEOUT_MS * 8 */, + const std::chrono::milliseconds retryMs /* = 10ms */) +{ + auto before = getKitPids(); + for (pid_t pid : before) + killPid(testname, pid); + + TST_LOG("Waiting for these kit processes to close: " << getPidList(before)); + + bool pass = false; + int tries = (timeoutMs / retryMs); + + std::set inters; + while (tries >= 0 && !pass) + { + std::this_thread::sleep_for(retryMs); + + auto current = getKitPids(); + + inters.clear(); + std::set_intersection( + current.begin(), current.end(), before.begin(), before.end(), + std::inserter(inters, inters.begin())); + + pass = inters.empty(); + tries--; + + TST_LOG("Current kit processes: " << getPidList(current)); + } + + TST_LOG("Finished waiting for all previous kit processes to close" + << " before: " << getPidList(before) + << " current: " << getPidList(getKitPids()) + << " intersection: " << getPidList(inters)); + + LOK_ASSERT_MESSAGE_SILENT("Timed out waiting for these kit processes to close", pass); } diff --git a/test/KitPidHelpers.hpp b/test/KitPidHelpers.hpp index 5dc17393a0..3d58099391 100644 --- a/test/KitPidHelpers.hpp +++ b/test/KitPidHelpers.hpp @@ -51,12 +51,9 @@ pid_t getForKitPid(); void logKitProcesses(const std::string& testname); /* - * Wait until the number of kit processes matches the desired count. - * Set numDocKits and numSpareKits to -1 to skip counting those processes + * SIGKILL relevant pid */ -void waitForKitProcessCount(const std::string& testname, int numDocKits, int numSpareKits, - const std::chrono::milliseconds timeoutMs, - const std::chrono::milliseconds retryMs); +void killPid(const std::string& testname, const pid_t pid); /* * Wait until ready with 0 doc kits and 1 spare kit @@ -69,11 +66,10 @@ void waitForKitPidsReady( const std::chrono::milliseconds retryMs = std::chrono::milliseconds(KIT_PID_RETRY_MS)); /* - * Wait until all doc and spare kits are closed + * Kill all and wait for previous processes to die */ -void waitForKitPidsKilled( - const std::string& testname, - const std::chrono::milliseconds timeoutMs = std::chrono::milliseconds(KIT_PID_TIMEOUT_MS), - const std::chrono::milliseconds retryMs = std::chrono::milliseconds(KIT_PID_RETRY_MS)); +void killAllKitProcesses(const std::string& testname, + const std::chrono::milliseconds timeoutMs = std::chrono::milliseconds(KIT_PID_TIMEOUT_MS), + const std::chrono::milliseconds retryMs = std::chrono::milliseconds(KIT_PID_RETRY_MS)); } // namespace helpers diff --git a/test/httpcrashtest.cpp b/test/httpcrashtest.cpp index 669581ad33..649ecad314 100644 --- a/test/httpcrashtest.cpp +++ b/test/httpcrashtest.cpp @@ -64,9 +64,7 @@ class HTTPCrashTest : public CPPUNIT_NS::TestFixture void testRecoverAfterKitCrash(); void testCrashForkit(); - void killPid(const pid_t pid, const std::string& testname); void killDocKitProcesses(const std::string& testname); - void killAllKitProcesses(const std::string& testname); public: HTTPCrashTest() @@ -114,8 +112,7 @@ void HTTPCrashTest::testBarren() try { TST_LOG("Killing all kits"); - killAllKitProcesses(testname); - waitForKitPidsKilled(testname); + helpers::killAllKitProcesses(testname); // Do not wait for spare kit to start up here @@ -150,8 +147,7 @@ void HTTPCrashTest::testCrashKit() std::this_thread::sleep_for(std::chrono::seconds(1)); TST_LOG("Killing coolkit instances."); - killAllKitProcesses(testname); - waitForKitPidsKilled(testname); + helpers::killAllKitProcesses(testname); // TST_LOG("Reading the error code from the socket."); //FIXME: implement in WebSocketSession. @@ -186,7 +182,6 @@ void HTTPCrashTest::testRecoverAfterKitCrash() TST_LOG("Killing coolkit instances."); killDocKitProcesses(testname); - waitForKitPidsReady(testname); TST_LOG("Reconnect after kill."); std::shared_ptr socket2 = loadDocAndGetSession( @@ -222,7 +217,7 @@ void HTTPCrashTest::testCrashForkit() std::this_thread::sleep_for(std::chrono::seconds(1)); TST_LOG("Killing forkit."); - killPid(getForKitPid(), testname); + helpers::killPid(testname, getForKitPid()); TST_LOG("Communicating after kill."); sendTextFrame(socket, "status", testname); @@ -234,8 +229,7 @@ void HTTPCrashTest::testCrashForkit() socket->waitForDisconnection(std::chrono::seconds(5))); TST_LOG("Killing coolkit."); - killAllKitProcesses(testname); - waitForKitPidsKilled(testname); + helpers::killAllKitProcesses(testname); // Forkit should restart waitForKitPidsReady(testname); @@ -255,29 +249,13 @@ void HTTPCrashTest::testCrashForkit() } } -void HTTPCrashTest::killPid(const pid_t pid, const std::string& testname) -{ - TST_LOG("Killing " << pid); - if (kill(pid, SIGKILL) == -1) - { - TST_LOG("kill(" << pid << ", SIGKILL) failed: " << Util::symbolicErrno(errno) << ": " << std::strerror(errno)); - } -} - void HTTPCrashTest::killDocKitProcesses(const std::string& testname) { for (pid_t pid : getDocKitPids()) { - killPid(pid, testname); - } -} - -void HTTPCrashTest::killAllKitProcesses(const std::string& testname) -{ - for (pid_t pid : getKitPids()) - { - killPid(pid, testname); + helpers::killPid(testname, pid); } + waitForKitPidsReady(testname); } CPPUNIT_TEST_SUITE_REGISTRATION(HTTPCrashTest);