Some PIDs can remain in the watched list even though their processes have
exited since a long time. It can easily happen if the main process of a forking
service manages to spawn a child before the control process exits for example.
However when a pid is about to be mapped to a unit by calling unit_watch_pid(),
the caller usually knows if the pid should belong to this unit exclusively: if
we just forked() off a child, then we can be sure that its PID is otherwise
unused. In this case we take this opportunity to remove any stalled PIDs from
the watched process list.
If we learnt about a PID in any other form (for example via PID file, via
searching, MAINPID= and so on), then we can't assume anything.
Thanks Renaud Métrich for backporting this to RHEL.
Resolves: #
1744972
pid_t pid;
while ((r = cg_read_pid(f, &pid)) > 0) {
- r = unit_watch_pid(u, pid);
+ r = unit_watch_pid(u, pid, false);
if (r < 0 && ret >= 0)
ret = r;
}
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
- r = unit_watch_pid(UNIT(s), pid);
+ r = unit_watch_pid(UNIT(s), pid, false);
if (r < 0 && r != -EEXIST)
return r;
}
job_finish_and_invalidate(j, JOB_CANCELED, false, false);
}
+void manager_unwatch_pid(Manager *m, pid_t pid) {
+ assert(m);
+
+ /* First let's drop the unit keyed as "pid". */
+ (void) hashmap_remove(m->watch_pids, PID_TO_PTR(pid));
+
+ /* Then, let's also drop the array keyed by -pid. */
+ free(hashmap_remove(m->watch_pids, PID_TO_PTR(-pid)));
+}
+
static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
Manager *m = userdata;
Job *j;
void manager_clear_jobs(Manager *m);
+void manager_unwatch_pid(Manager *m, pid_t pid);
+
unsigned manager_dispatch_load_queue(Manager *m);
int manager_environment_add(Manager *m, char **minus, char **plus);
pid_is_unwaited(m->control_pid) &&
MOUNT_STATE_WITH_PROCESS(new_state)) {
- r = unit_watch_pid(UNIT(m), m->control_pid);
+ r = unit_watch_pid(UNIT(m), m->control_pid, false);
if (r < 0)
return r;
if (r < 0)
return r;
- r = unit_watch_pid(UNIT(m), pid);
+ r = unit_watch_pid(UNIT(m), pid, true);
if (r < 0)
- /* FIXME: we need to do something here */
return r;
*_pid = pid;
if (r < 0)
return r;
- r = unit_watch_pid(UNIT(s), pid);
+ r = unit_watch_pid(UNIT(s), pid, false);
if (r < 0) /* FIXME: we need to do something here */
return log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" for service: %m", pid);
if (service_set_main_pid(s, pid) < 0)
return;
- r = unit_watch_pid(UNIT(s), pid);
+ r = unit_watch_pid(UNIT(s), pid, false);
if (r < 0)
/* FIXME: we need to do something here */
log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" from: %m", pid);
SERVICE_RUNNING, SERVICE_RELOAD,
SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
- r = unit_watch_pid(UNIT(s), s->main_pid);
+ r = unit_watch_pid(UNIT(s), s->main_pid, false);
if (r < 0)
return r;
}
SERVICE_RELOAD,
SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
- r = unit_watch_pid(UNIT(s), s->control_pid);
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
if (r < 0)
return r;
}
s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
s->exec_fd_hot = false;
- r = unit_watch_pid(UNIT(s), pid);
- if (r < 0) /* FIXME: we need to do something here */
+ r = unit_watch_pid(UNIT(s), pid, true);
+ if (r < 0)
return r;
*_pid = pid;
}
if (r > 0) {
service_set_main_pid(s, new_main_pid);
- unit_watch_pid(UNIT(s), new_main_pid);
+ unit_watch_pid(UNIT(s), new_main_pid, false);
notify_dbus = true;
}
}
log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, name, pid);
service_set_main_pid(s, pid);
- unit_watch_pid(UNIT(s), pid);
+ unit_watch_pid(UNIT(s), pid, false);
}
}
}
SOCKET_FINAL_SIGTERM,
SOCKET_FINAL_SIGKILL)) {
- r = unit_watch_pid(UNIT(s), s->control_pid);
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
if (r < 0)
return r;
if (r < 0)
return r;
- r = unit_watch_pid(UNIT(s), pid);
+ r = unit_watch_pid(UNIT(s), pid, true);
if (r < 0)
- /* FIXME: we need to do something here */
return r;
*_pid = pid;
_exit(EXIT_SUCCESS);
}
- r = unit_watch_pid(UNIT(s), pid);
+ r = unit_watch_pid(UNIT(s), pid, true);
if (r < 0)
goto fail;
pid_is_unwaited(s->control_pid) &&
SWAP_STATE_WITH_PROCESS(new_state)) {
- r = unit_watch_pid(UNIT(s), s->control_pid);
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
if (r < 0)
return r;
if (r < 0)
goto fail;
- r = unit_watch_pid(UNIT(s), pid);
+ r = unit_watch_pid(UNIT(s), pid, true);
if (r < 0)
- /* FIXME: we need to do something here */
goto fail;
*_pid = pid;
unit_add_to_gc_queue(u);
}
-int unit_watch_pid(Unit *u, pid_t pid) {
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive) {
int r;
assert(u);
/* Watch a specific PID */
+ /* Caller might be sure that this PID belongs to this unit only. Let's take this
+ * opportunity to remove any stalled references to this PID as they can be created
+ * easily (when watching a process which is not our direct child). */
+ if (exclusive)
+ manager_unwatch_pid(u->manager, pid);
+
r = set_ensure_allocated(&u->pids, NULL);
if (r < 0)
return r;
void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags);
-int unit_watch_pid(Unit *u, pid_t pid);
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive);
void unit_unwatch_pid(Unit *u, pid_t pid);
void unit_unwatch_all_pids(Unit *u);
assert_se(hashmap_isempty(m->watch_pids));
assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
- assert_se(unit_watch_pid(a, 4711) >= 0);
+ assert_se(unit_watch_pid(a, 4711, false) >= 0);
assert_se(manager_get_unit_by_pid(m, 4711) == a);
- assert_se(unit_watch_pid(a, 4711) >= 0);
+ assert_se(unit_watch_pid(a, 4711, false) >= 0);
assert_se(manager_get_unit_by_pid(m, 4711) == a);
- assert_se(unit_watch_pid(b, 4711) >= 0);
+ assert_se(unit_watch_pid(b, 4711, false) >= 0);
u = manager_get_unit_by_pid(m, 4711);
assert_se(u == a || u == b);
- assert_se(unit_watch_pid(b, 4711) >= 0);
+ assert_se(unit_watch_pid(b, 4711, false) >= 0);
u = manager_get_unit_by_pid(m, 4711);
assert_se(u == a || u == b);
- assert_se(unit_watch_pid(c, 4711) >= 0);
+ assert_se(unit_watch_pid(c, 4711, false) >= 0);
u = manager_get_unit_by_pid(m, 4711);
assert_se(u == a || u == b || u == c);
- assert_se(unit_watch_pid(c, 4711) >= 0);
+ assert_se(unit_watch_pid(c, 4711, false) >= 0);
u = manager_get_unit_by_pid(m, 4711);
assert_se(u == a || u == b || u == c);