wsd: admin: use the cgroup limit as available memory

This applies the cgroup memory limit, if set, such that if it is lower than the configured memproportion percentage, we do not exceed it. Otherwise, we risk running out of our cgroup limit and by then it is too late to do anything but die due to OOM. This also moves the logging of the cgroup memory stats from COOLWSD into Admin, to avoid duplicate logging. Also updated the description of memproportion config entry to account for the cgroup logic. Change-Id: I870ae61c1260eb2b3275bd2fa1a4c48ff30957a2 Signed-off-by: Ashod Nakashian <ashod.nakashian@collabora.co.uk>
2023-12-29 08:07:31 -05:00 · 2023-12-29 08:07:31 -05:00 · 373c04fe4e
parent a44e9411a8
commit 373c04fe4e
3 changed files with 54 additions and 30 deletions
--- a/coolwsd.xml.in
+++ b/coolwsd.xml.in
@ -43,7 +43,7 @@
    <hexify_embedded_urls desc="Enable to protect encoded URLs from getting decoded by intermediate hops. Particularly useful on Azure deployments" type="bool" default="false"></hexify_embedded_urls>
    <experimental_features desc="Enable/Disable experimental features" type="bool" default="@ENABLE_EXPERIMENTAL@">@ENABLE_EXPERIMENTAL@</experimental_features>

-    <memproportion desc="The maximum percentage of system memory consumed by all of the @APP_NAME@, after which we start cleaning up idle documents" type="double" default="80.0"></memproportion>
+    <memproportion desc="The maximum percentage of available memory consumed by all of the @APP_NAME@ processes, after which we start cleaning up idle documents. If cgroup memory limits are set, this is the maximum percentage of that limit to consume." type="double" default="80.0"></memproportion>
    <num_prespawn_children desc="Number of child processes to keep started in advance and waiting for new clients." type="uint" default="1">1</num_prespawn_children>
    <!-- <fetch_update_check desc="Every number of hours will fetch latest version data. Defaults to 10 hours." type="uint" default="10">10</fetch_update_check> -->
    <per_document desc="Document-specific settings, including LO Core settings.">
--- a/wsd/Admin.cpp
+++ b/wsd/Admin.cpp
@ -514,15 +514,62 @@ Admin::Admin()

    LOG_TRC("Total system memory:  " << _totalSysMemKb << " KB");

-    const auto memLimit = COOLWSD::getConfigValue<double>("memproportion", 0.0);
+    // If there is a cgroup limit that is smaller still, apply it.
+    const std::size_t cgroupMemLimitKb = Util::getCGroupMemLimit() / 1024;
+    if (cgroupMemLimitKb > 0 && cgroupMemLimitKb < _totalAvailMemKb)
+    {
+        LOG_TRC("cgroup memory limit: " << cgroupMemLimitKb << " KB");
+        _totalAvailMemKb = cgroupMemLimitKb;
+    }
+    else
+        LOG_TRC("no cgroup memory limit");
+
+    // If there is a cgroup soft-limit that is smaller still, apply that.
+    const std::size_t cgroupMemSoftLimitKb = Util::getCGroupMemSoftLimit() / 1024;
+    if (cgroupMemSoftLimitKb > 0 && cgroupMemSoftLimitKb < _totalAvailMemKb)
+    {
+        LOG_TRC("cgroup memory soft limit: " << cgroupMemSoftLimitKb << " KB");
+        _totalAvailMemKb = cgroupMemSoftLimitKb;
+    }
+    else
+        LOG_TRC("no cgroup memory soft limit");
+
+    // Reserve some minimum memory (1 MB, arbitrarily)
+    // as headroom. Otherwise, coolwsd might fail to
+    // clean-up Kits when we run out, and by then we die.
+    // This should be enough to update DocBroker containers,
+    // take locks, print logs, etc. during cleaning up.
+    std::size_t minHeadroomKb = 1024;
+
+    // If we have a manual percentage cap, apply it.
+    const double memLimit = COOLWSD::getConfigValue<double>("memproportion", 0.0);
    if (memLimit > 0.0)
-        _totalAvailMemKb = _totalSysMemKb * memLimit / 100.;
+    {
+        const double headroom = _totalAvailMemKb * (100. - memLimit) / 100.;
+        if (minHeadroomKb < headroom)
+            minHeadroomKb = static_cast<std::size_t>(headroom);
+    }

-    LOG_TRC("Total available memory: " << _totalAvailMemKb << " KB (memproportion: " << memLimit << "%).");
+    if (_totalAvailMemKb > minHeadroomKb)
+    {
+        _totalAvailMemKb -= minHeadroomKb;
+    }

-    const size_t totalMem = getTotalMemoryUsage();
-    LOG_TRC("Total memory used: " << totalMem << " KB.");
-    _model.addMemStats(totalMem);
+    const size_t totalUsedMemKb = getTotalMemoryUsage();
+    _model.addMemStats(totalUsedMemKb);
+
+    LOG_INF("Total available memory: "
+            << _totalAvailMemKb << " KB, System memory: " << _totalSysMemKb
+            << " KB, configured memproportion: " << memLimit
+            << "%, actual percentage of system total: " << std::setprecision(2)
+            << _totalAvailMemKb * 100. / _totalSysMemKb << "%, current usage: " << totalUsedMemKb
+            << " KB (" << totalUsedMemKb * 100. / _totalAvailMemKb << "% of limit)");
+
+    if (_totalAvailMemKb < 1000 * 1024)
+        LOG_WRN("Low memory condition detected: only " << _totalAvailMemKb / 1024
+                                                       << " MB of RAM available");
+
+    LOG_INF("hardware threads: " << std::thread::hardware_concurrency());
 }

 Admin::~Admin()
--- a/wsd/COOLWSD.cpp
+++ b/wsd/COOLWSD.cpp
@ -5962,29 +5962,6 @@ int COOLWSD::innerMain()
    Util::getVersionInfo(version, hash);
    LOG_INF("Coolwsd version details: " << version << " - " << hash << " - id " << Util::getProcessIdentifier() << " - on " << Util::getLinuxVersion());

-    std::size_t availableMemoryMb = Util::getTotalSystemMemoryKb()/1024;
-    LOG_INF("available memory: " << availableMemoryMb << " MB");
-    std::size_t cgroupMemLimitMb = Util::getCGroupMemLimit()/(1024*1024);
-    if (cgroupMemLimitMb > 0 && cgroupMemLimitMb < availableMemoryMb)
-    {
-        LOG_INF("cgroup memory limit: " << cgroupMemLimitMb << " MB");
-        availableMemoryMb = cgroupMemLimitMb;
-    }
-    else
-        LOG_INF("no cgroup memory limit");
-
-    std::size_t cgroupMemSoftLimitMb = Util::getCGroupMemSoftLimit()/(1024*1024);
-    if (cgroupMemSoftLimitMb > 0 && cgroupMemSoftLimitMb < availableMemoryMb)
-    {
-        LOG_INF("cgroup memory soft limit: " << cgroupMemSoftLimitMb << " MB");
-        availableMemoryMb = cgroupMemSoftLimitMb;
-    }
-    else
-        LOG_INF("no cgroup memory soft limit");
-    LOG_INF("hardware threads: " << std::thread::hardware_concurrency());
-
-    if (availableMemoryMb < 1000)
-        LOG_WRN("Low memory condition detected: only " << availableMemoryMb << " MB of RAM available");
 #endif

    initializeSSL();