core: Check unit start rate limiting earlier
authorDaan De Meyer <daan.j.demeyer@gmail.com>
Tue, 24 Aug 2021 15:46:47 +0000 (16:46 +0100)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 12 Oct 2021 15:48:59 +0000 (17:48 +0200)
Fixes #17433. Currently, if any of the validations we do before we
check start rate limiting fail, we can still enter a busy loop as
no rate limiting gets applied. A common occurence of this scenario
is path units triggering a service that fails a condition check.

To fix the issue, we simply move up start rate limiting checks to
be the first thing we do when starting a unit. To achieve this,
we add a new method to the unit vtable and implement it for the
relevant unit types so that we can do the start rate limit checks
earlier on.

(cherry picked from commit 9727f2427ff6b2e1f4ab927cc57ad8e888f04e95)
(cherry picked from commit ed8fbbf1745c6a2dc0b8cd560ac8a3353f72e979)

16 files changed:
src/core/automount.c
src/core/mount.c
src/core/path.c
src/core/service.c
src/core/socket.c
src/core/swap.c
src/core/timer.c
src/core/unit.c
src/core/unit.h
test/TEST-63-ISSUE-17433/Makefile [new symlink]
test/TEST-63-ISSUE-17433/test.sh [new file with mode: 0755]
test/meson.build
test/testsuite-10.units/test10.service
test/testsuite-63.units/test63.path [new file with mode: 0644]
test/testsuite-63.units/test63.service [new file with mode: 0644]
test/units/testsuite-63.service [new file with mode: 0644]

index fa7f64b71af3a573b5935efad5769c893e9c7411..907497bdaaab7249f8fe1d78fcffaf283656e5f1 100644 (file)
@@ -814,12 +814,6 @@ static int automount_start(Unit *u) {
         if (r < 0)
                 return r;
 
-        r = unit_test_start_limit(u);
-        if (r < 0) {
-                automount_enter_dead(a, AUTOMOUNT_FAILURE_START_LIMIT_HIT);
-                return r;
-        }
-
         r = unit_acquire_invocation_id(u);
         if (r < 0)
                 return r;
@@ -1065,6 +1059,21 @@ static bool automount_supported(void) {
         return supported;
 }
 
+static int automount_test_start_limit(Unit *u) {
+        Automount *a = AUTOMOUNT(u);
+        int r;
+
+        assert(a);
+
+        r = unit_test_start_limit(u);
+        if (r < 0) {
+                automount_enter_dead(a, AUTOMOUNT_FAILURE_START_LIMIT_HIT);
+                return r;
+        }
+
+        return 0;
+}
+
 static const char* const automount_result_table[_AUTOMOUNT_RESULT_MAX] = {
         [AUTOMOUNT_SUCCESS] = "success",
         [AUTOMOUNT_FAILURE_RESOURCES] = "resources",
@@ -1127,4 +1136,6 @@ const UnitVTable automount_vtable = {
                         [JOB_FAILED]     = "Failed to unset automount %s.",
                 },
         },
+
+        .test_start_limit = automount_test_start_limit,
 };
index ec69f5599ffa12e1bcd0f50677935ad03cbc7d33..dc272d9ab403c8f28ed3df03224c341b84b9fc28 100644 (file)
@@ -1145,12 +1145,6 @@ static int mount_start(Unit *u) {
 
         assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
 
-        r = unit_test_start_limit(u);
-        if (r < 0) {
-                mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT);
-                return r;
-        }
-
         r = unit_acquire_invocation_id(u);
         if (r < 0)
                 return r;
@@ -2116,6 +2110,21 @@ static int mount_can_clean(Unit *u, ExecCleanMask *ret) {
         return exec_context_get_clean_mask(&m->exec_context, ret);
 }
 
+static int mount_test_start_limit(Unit *u) {
+        Mount *m = MOUNT(u);
+        int r;
+
+        assert(m);
+
+        r = unit_test_start_limit(u);
+        if (r < 0) {
+                mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT);
+                return r;
+        }
+
+        return 0;
+}
+
 static const char* const mount_exec_command_table[_MOUNT_EXEC_COMMAND_MAX] = {
         [MOUNT_EXEC_MOUNT] = "ExecMount",
         [MOUNT_EXEC_UNMOUNT] = "ExecUnmount",
@@ -2213,4 +2222,6 @@ const UnitVTable mount_vtable = {
                         [JOB_TIMEOUT]    = "Timed out unmounting %s.",
                 },
         },
+
+        .test_start_limit = mount_test_start_limit,
 };
index c055fcb3d22d1d705caff072b6e2fad9ed8e0f36..9c3b09ec0344aacdec519e5581dd7862eaf5c6b2 100644 (file)
@@ -566,12 +566,6 @@ static int path_start(Unit *u) {
         if (r < 0)
                 return r;
 
-        r = unit_test_start_limit(u);
-        if (r < 0) {
-                path_enter_dead(p, PATH_FAILURE_START_LIMIT_HIT);
-                return r;
-        }
-
         r = unit_acquire_invocation_id(u);
         if (r < 0)
                 return r;
@@ -787,6 +781,21 @@ static void path_reset_failed(Unit *u) {
         p->result = PATH_SUCCESS;
 }
 
+static int path_test_start_limit(Unit *u) {
+        Path *p = PATH(u);
+        int r;
+
+        assert(p);
+
+        r = unit_test_start_limit(u);
+        if (r < 0) {
+                path_enter_dead(p, PATH_FAILURE_START_LIMIT_HIT);
+                return r;
+        }
+
+        return 0;
+}
+
 static const char* const path_type_table[_PATH_TYPE_MAX] = {
         [PATH_EXISTS] = "PathExists",
         [PATH_EXISTS_GLOB] = "PathExistsGlob",
@@ -841,4 +850,6 @@ const UnitVTable path_vtable = {
         .reset_failed = path_reset_failed,
 
         .bus_set_property = bus_path_set_property,
+
+        .test_start_limit = path_test_start_limit,
 };
index 89351c51c0782e734565fe091212622aa63f8f91..07443fec43438d4a751fbeb2f6ca5e37e0eec54e 100644 (file)
@@ -2441,13 +2441,6 @@ static int service_start(Unit *u) {
 
         assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED));
 
-        /* Make sure we don't enter a busy loop of some kind. */
-        r = unit_test_start_limit(u);
-        if (r < 0) {
-                service_enter_dead(s, SERVICE_FAILURE_START_LIMIT_HIT, false);
-                return r;
-        }
-
         r = unit_acquire_invocation_id(u);
         if (r < 0)
                 return r;
@@ -4448,6 +4441,22 @@ static const char *service_finished_job(Unit *u, JobType t, JobResult result) {
         return NULL;
 }
 
+static int service_test_start_limit(Unit *u) {
+        Service *s = SERVICE(u);
+        int r;
+
+        assert(s);
+
+        /* Make sure we don't enter a busy loop of some kind. */
+        r = unit_test_start_limit(u);
+        if (r < 0) {
+                service_enter_dead(s, SERVICE_FAILURE_START_LIMIT_HIT, false);
+                return r;
+        }
+
+        return 0;
+}
+
 static const char* const service_restart_table[_SERVICE_RESTART_MAX] = {
         [SERVICE_RESTART_NO] = "no",
         [SERVICE_RESTART_ON_SUCCESS] = "on-success",
@@ -4614,4 +4623,6 @@ const UnitVTable service_vtable = {
                 },
                 .finished_job = service_finished_job,
         },
+
+        .test_start_limit = service_test_start_limit,
 };
index 47092e15711f7d7587a22229f7d6bd489bea1423..5c2298ee5883f2fa83da2fe86e9afedbec63540f 100644 (file)
@@ -2517,12 +2517,6 @@ static int socket_start(Unit *u) {
 
         assert(IN_SET(s->state, SOCKET_DEAD, SOCKET_FAILED));
 
-        r = unit_test_start_limit(u);
-        if (r < 0) {
-                socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
-                return r;
-        }
-
         r = unit_acquire_invocation_id(u);
         if (r < 0)
                 return r;
@@ -3431,6 +3425,21 @@ static int socket_can_clean(Unit *u, ExecCleanMask *ret) {
         return exec_context_get_clean_mask(&s->exec_context, ret);
 }
 
+static int socket_test_start_limit(Unit *u) {
+        Socket *s = SOCKET(u);
+        int r;
+
+        assert(s);
+
+        r = unit_test_start_limit(u);
+        if (r < 0) {
+                socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
+                return r;
+        }
+
+        return 0;
+}
+
 static const char* const socket_exec_command_table[_SOCKET_EXEC_COMMAND_MAX] = {
         [SOCKET_EXEC_START_PRE] = "ExecStartPre",
         [SOCKET_EXEC_START_CHOWN] = "ExecStartChown",
@@ -3561,4 +3570,6 @@ const UnitVTable socket_vtable = {
                         [JOB_TIMEOUT]    = "Timed out stopping %s.",
                 },
         },
+
+        .test_start_limit = socket_test_start_limit,
 };
index 9edb929555e9ec5650b5bffb4824088e19e665c4..3cd959d22fbbcaa456996c98b8e6e295165b2153 100644 (file)
@@ -938,12 +938,6 @@ static int swap_start(Unit *u) {
                 if (UNIT(other)->job && UNIT(other)->job->state == JOB_RUNNING)
                         return -EAGAIN;
 
-        r = unit_test_start_limit(u);
-        if (r < 0) {
-                swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
-                return r;
-        }
-
         r = unit_acquire_invocation_id(u);
         if (r < 0)
                 return r;
@@ -1593,6 +1587,21 @@ static int swap_can_clean(Unit *u, ExecCleanMask *ret) {
         return exec_context_get_clean_mask(&s->exec_context, ret);
 }
 
+static int swap_test_start_limit(Unit *u) {
+        Swap *s = SWAP(u);
+        int r;
+
+        assert(s);
+
+        r = unit_test_start_limit(u);
+        if (r < 0) {
+                swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
+                return r;
+        }
+
+        return 0;
+}
+
 static const char* const swap_exec_command_table[_SWAP_EXEC_COMMAND_MAX] = {
         [SWAP_EXEC_ACTIVATE] = "ExecActivate",
         [SWAP_EXEC_DEACTIVATE] = "ExecDeactivate",
@@ -1688,4 +1697,6 @@ const UnitVTable swap_vtable = {
                         [JOB_TIMEOUT]    = "Timed out deactivating swap %s.",
                 },
         },
+
+        .test_start_limit = swap_test_start_limit,
 };
index b0637688154e14ac4186675d0e7be2151025de22..19573ef0326bccad747b4095b79318d9dc5bf1a5 100644 (file)
@@ -635,12 +635,6 @@ static int timer_start(Unit *u) {
         if (r < 0)
                 return r;
 
-        r = unit_test_start_limit(u);
-        if (r < 0) {
-                timer_enter_dead(t, TIMER_FAILURE_START_LIMIT_HIT);
-                return r;
-        }
-
         r = unit_acquire_invocation_id(u);
         if (r < 0)
                 return r;
@@ -901,6 +895,21 @@ static int timer_can_clean(Unit *u, ExecCleanMask *ret) {
         return 0;
 }
 
+static int timer_test_start_limit(Unit *u) {
+        Timer *t = TIMER(u);
+        int r;
+
+        assert(t);
+
+        r = unit_test_start_limit(u);
+        if (r < 0) {
+                timer_enter_dead(t, TIMER_FAILURE_START_LIMIT_HIT);
+                return r;
+        }
+
+        return 0;
+}
+
 static const char* const timer_base_table[_TIMER_BASE_MAX] = {
         [TIMER_ACTIVE] = "OnActiveSec",
         [TIMER_BOOT] = "OnBootSec",
@@ -960,4 +969,6 @@ const UnitVTable timer_vtable = {
         .timezone_change = timer_timezone_change,
 
         .bus_set_property = bus_timer_set_property,
+
+        .test_start_limit = timer_test_start_limit,
 };
index 780f78e1c5217d272b0df721a35d7948a3655c8e..f841c6dc81d7777ebc5cd861b827239dd1eb87fa 100644 (file)
@@ -1613,6 +1613,13 @@ int unit_start(Unit *u) {
 
         assert(u);
 
+        /* Check start rate limiting early so that failure conditions don't cause us to enter a busy loop. */
+        if (UNIT_VTABLE(u)->test_start_limit) {
+                int r = UNIT_VTABLE(u)->test_start_limit(u);
+                if (r < 0)
+                        return r;
+        }
+
         /* If this is already started, then this will succeed. Note that this will even succeed if this unit
          * is not startable by the user. This is relied on to detect when we need to wait for units and when
          * waiting is finished. */
index a34d260943445e3c218c3eda76c34a97631e0375..212a78e99d5052f5cafea9571201de7534c45c15 100644 (file)
@@ -618,6 +618,10 @@ typedef struct UnitVTable {
          * of this type will immediately fail. */
         bool (*supported)(void);
 
+        /* If this function is set, it's invoked first as part of starting a unit to allow start rate
+         * limiting checks to occur before we do anything else. */
+        int (*test_start_limit)(Unit *u);
+
         /* The strings to print in status messages */
         UnitStatusMessageFormats status_message_formats;
 
diff --git a/test/TEST-63-ISSUE-17433/Makefile b/test/TEST-63-ISSUE-17433/Makefile
new file mode 120000 (symlink)
index 0000000..e9f93b1
--- /dev/null
@@ -0,0 +1 @@
+../TEST-01-BASIC/Makefile
\ No newline at end of file
diff --git a/test/TEST-63-ISSUE-17433/test.sh b/test/TEST-63-ISSUE-17433/test.sh
new file mode 100755 (executable)
index 0000000..c595a9f
--- /dev/null
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -e
+
+TEST_DESCRIPTION="https://github.com/systemd/systemd/issues/17433"
+
+# shellcheck source=test/test-functions
+. "${TEST_BASE_DIR:?}/test-functions"
+
+do_test "$@"
index 180e6bb3f7ac0a990047eb73381b357863ab85d6..ff43d5e4e422cb997dacefec6db92d29dfbf8300 100644 (file)
@@ -33,6 +33,8 @@ if install_tests
                        install_dir : testdata_dir)
         install_subdir('testsuite-52.units',
                        install_dir : testdata_dir)
+        install_subdir('testsuite-63.units',
+                       install_dir : testdata_dir)
 
         testsuite08_dir = testdata_dir + '/testsuite-08.units'
         install_data('testsuite-08.units/-.mount',
index d0be786b019a1948d9367d81ebe0b362f551adc0..2fb476b986d82fa7b154083116736825877890c4 100644 (file)
@@ -1,6 +1,9 @@
 [Unit]
 Requires=test10.socket
 ConditionPathExistsGlob=/tmp/nonexistent
+# Make sure we hit the socket trigger limit in the test and not the service start limit.
+StartLimitInterval=1000
+StartLimitBurst=1000
 
 [Service]
 ExecStart=true
diff --git a/test/testsuite-63.units/test63.path b/test/testsuite-63.units/test63.path
new file mode 100644 (file)
index 0000000..a6573bd
--- /dev/null
@@ -0,0 +1,2 @@
+[Path]
+PathExists=/tmp/test63
diff --git a/test/testsuite-63.units/test63.service b/test/testsuite-63.units/test63.service
new file mode 100644 (file)
index 0000000..c838018
--- /dev/null
@@ -0,0 +1,5 @@
+[Unit]
+ConditionPathExists=!/tmp/nonexistent
+
+[Service]
+ExecStart=true
diff --git a/test/units/testsuite-63.service b/test/units/testsuite-63.service
new file mode 100644 (file)
index 0000000..0412272
--- /dev/null
@@ -0,0 +1,16 @@
+[Unit]
+Description=TEST-63-ISSUE-17433
+
+[Service]
+ExecStartPre=rm -f /failed /testok
+Type=oneshot
+ExecStart=rm -f /tmp/nonexistent
+ExecStart=systemctl start test63.path
+ExecStart=touch /tmp/test63
+# Make sure systemd has sufficient time to hit the start limit for test63.service.
+ExecStart=sleep 2
+ExecStart=sh -x -c 'test "$(systemctl show test63.service -P ActiveState)" = failed'
+ExecStart=sh -x -c 'test "$(systemctl show test63.service -P Result)" = start-limit-hit'
+ExecStart=sh -x -c 'test "$(systemctl show test63.path -P ActiveState)" = failed'
+ExecStart=sh -x -c 'test "$(systemctl show test63.path -P Result)" = unit-start-limit-hit'
+ExecStart=sh -x -c 'echo OK >/testok'