diff --git a/init/init.cpp b/init/init.cpp index eca7bc564..5a0b3a6c2 100644 --- a/init/init.cpp +++ b/init/init.cpp @@ -578,12 +578,29 @@ static void HandleSigtermSignal(const signalfd_siginfo& siginfo) { HandlePowerctlMessage("shutdown,container"); } +static constexpr std::chrono::milliseconds kDiagnosticTimeout = 10s; + static void HandleSignalFd() { signalfd_siginfo siginfo; - ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo))); - if (bytes_read != sizeof(siginfo)) { - PLOG(ERROR) << "Failed to read siginfo from signal_fd"; - return; + auto started = std::chrono::steady_clock::now(); + for (;;) { + ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo))); + if (bytes_read < 0 && errno == EAGAIN) { + auto now = std::chrono::steady_clock::now(); + std::chrono::duration waited = now - started; + if (waited >= kDiagnosticTimeout) { + LOG(ERROR) << "epoll() woke us up, but we waited with no SIGCHLD!"; + started = now; + } + + std::this_thread::sleep_for(100ms); + continue; + } + if (bytes_read != sizeof(siginfo)) { + PLOG(ERROR) << "Failed to read siginfo from signal_fd"; + return; + } + break; } switch (siginfo.ssi_signo) { @@ -639,7 +656,7 @@ static void InstallSignalFdHandler(Epoll* epoll) { LOG(FATAL) << "Failed to register a fork handler: " << strerror(result); } - signal_fd = signalfd(-1, &mask, SFD_CLOEXEC); + signal_fd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_NONBLOCK); if (signal_fd == -1) { PLOG(FATAL) << "failed to create signalfd"; } @@ -938,7 +955,7 @@ int SecondStageMain(int argc, char** argv) { setpriority(PRIO_PROCESS, 0, 0); while (true) { // By default, sleep until something happens. - auto epoll_timeout = std::optional{}; + auto epoll_timeout = std::optional{kDiagnosticTimeout}; auto shutdown_command = shutdown_state.CheckShutdown(); if (shutdown_command) { @@ -978,6 +995,13 @@ int SecondStageMain(int argc, char** argv) { for (const auto& function : *pending_functions) { (*function)(); } + } else if (Service::is_exec_service_running()) { + std::chrono::duration waited = + std::chrono::steady_clock::now() - Service::exec_service_started(); + if (waited >= kDiagnosticTimeout) { + LOG(ERROR) << "Exec service is hung? Waited " << waited.count() + << " without SIGCHLD"; + } } if (!IsShuttingDown()) { HandleControlMessages(); diff --git a/init/service.cpp b/init/service.cpp index 8a9cc0a10..2ebf87eb1 100644 --- a/init/service.cpp +++ b/init/service.cpp @@ -127,6 +127,7 @@ static bool ExpandArgsAndExecv(const std::vector& args, bool sigsto unsigned long Service::next_start_order_ = 1; bool Service::is_exec_service_running_ = false; +std::chrono::time_point Service::exec_service_started_; Service::Service(const std::string& name, Subcontext* subcontext_for_restart_commands, const std::vector& args, bool from_apex) @@ -388,6 +389,7 @@ Result Service::ExecStart() { flags_ |= SVC_EXEC; is_exec_service_running_ = true; + exec_service_started_ = std::chrono::steady_clock::now(); LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << proc_attr_.uid << " gid " << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context " diff --git a/init/service.h b/init/service.h index 3f12aa22f..d233cbf43 100644 --- a/init/service.h +++ b/init/service.h @@ -102,6 +102,9 @@ class Service { size_t CheckAllCommands() const { return onrestart_.CheckAllCommands(); } static bool is_exec_service_running() { return is_exec_service_running_; } + static std::chrono::time_point exec_service_started() { + return exec_service_started_; + } const std::string& name() const { return name_; } const std::set& classnames() const { return classnames_; } @@ -154,6 +157,8 @@ class Service { static unsigned long next_start_order_; static bool is_exec_service_running_; + static std::chrono::time_point exec_service_started_; + static pid_t exec_service_pid_; std::string name_; std::set classnames_;