llkd: enhance list properties
Because of the limited length of properties, and to ease the complexity of product and vendor adjustments, the comma separated list properties will use a leading comma to preserve the defaults and add or subtract entries with + and - prefixes respectively. Without the leading comma, the list is explicitly specified as before. Cleanup: - use empty() instead of space() == 0 (or converse if != 0) - if (unlikely) pprocp can not be allocated, to a to_string(ppid) check For testing, observe before and after llkd_unit_test below to confirm leading comma effects for example: livelock: ro.llk.stack=wait_on_page_bit_killable,bit_wait_io,\ __get_user_pages,cma_alloc livelock: ro.llk.stack=...,SyS_openat,... Test: llkd_unit_test Bug: 120983740 Change-Id: Ia3d164c2fdac5295a474c6c1294a34e4ae9d0b61
This commit is contained in:
parent
b658ffa2f3
commit
8a5f081763
|
@ -86,7 +86,13 @@ kernel instead of deal with more graceful kill operation.
|
|||
Android Properties
|
||||
------------------
|
||||
|
||||
Android Properties llkd respond to (*prop*_ms parms are in milliseconds):
|
||||
The following are the Android Properties llkd respond to.
|
||||
*prop*_ms named properties are in milliseconds.
|
||||
Properties that use comma (*,*) separator for lists, use a leading separator to
|
||||
preserve default and add or subtract entries with (*optional*) plus (*+*) and
|
||||
minus (*-*) prefixes respectively.
|
||||
For these lists, the string "*false*" is synonymous with an *empty* list,
|
||||
and *blank* or *missing* resorts to the specified *default* value.
|
||||
|
||||
#### ro.config.low_ram
|
||||
device is configured with limited memory.
|
||||
|
@ -137,7 +143,6 @@ default 2 minutes samples of threads for D or Z.
|
|||
#### ro.llk.stack
|
||||
default cma_alloc,__get_user_pages,bit_wait_io,wait_on_page_bit_killable
|
||||
comma separated list of kernel symbols.
|
||||
The string "*false*" is the equivalent to an *empty* list.
|
||||
Look for kernel stack symbols that if ever persistently present can
|
||||
indicate a subsystem is locked up.
|
||||
Beware, check does not on purpose do forward scheduling ABA except by polling
|
||||
|
@ -154,7 +159,6 @@ concerns on user builds prevents this checking.
|
|||
default 0,1,2 (kernel, init and [kthreadd]) plus process names
|
||||
init,[kthreadd],[khungtaskd],lmkd,llkd,watchdogd,
|
||||
[watchdogd],[watchdogd/0],...,[watchdogd/***get_nprocs**-1*].
|
||||
The string "*false*" is the equivalent to an *empty* list.
|
||||
Do not watch these processes. A process can be comm, cmdline or pid reference.
|
||||
NB: automated default here can be larger than the current maximum property
|
||||
size of 92.
|
||||
|
@ -162,18 +166,15 @@ NB: false is a very very very unlikely process to want to blacklist.
|
|||
|
||||
#### ro.llk.blacklist.parent
|
||||
default 0,2,adbd (kernel, [kthreadd] and adbd).
|
||||
The string "*false*" is the equivalent to an *empty* list.
|
||||
Do not watch processes that have this parent.
|
||||
A parent process can be comm, cmdline or pid reference.
|
||||
|
||||
#### ro.llk.blacklist.uid
|
||||
default *empty* or false, comma separated list of uid numbers or names.
|
||||
The string "*false*" is the equivalent to an *empty* list.
|
||||
Do not watch processes that match this uid.
|
||||
|
||||
#### ro.llk.blacklist.process.stack
|
||||
default process names init,lmkd.llkd,llkd,keystore,ueventd,apexd,logd.
|
||||
The string "*false*" is the equivalent to an *empty* list.
|
||||
This subset of processes are not monitored for live lock stack signatures.
|
||||
Also prevents the sepolicy violation associated with processes that block
|
||||
ptrace, as these can not be checked anyways.
|
||||
|
|
108
llkd/libllkd.cpp
108
llkd/libllkd.cpp
|
@ -24,6 +24,7 @@
|
|||
#include <pwd.h> // getpwuid()
|
||||
#include <signal.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
|
||||
#include <sys/mman.h> // mlockall()
|
||||
#include <sys/prctl.h>
|
||||
|
@ -617,17 +618,24 @@ std::string llkFormat(bool flag) {
|
|||
std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
|
||||
std::string ret;
|
||||
for (const auto& entry : blacklist) {
|
||||
if (ret.size()) {
|
||||
ret += ",";
|
||||
}
|
||||
if (!ret.empty()) ret += ",";
|
||||
ret += entry;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// This function parses the properties as a list, incorporating the supplied
|
||||
// default. A leading comma separator means preserve the defaults and add
|
||||
// entries (with an optional leading + sign), or removes entries with a leading
|
||||
// - sign.
|
||||
//
|
||||
// We only officially support comma separators, but wetware being what they
|
||||
// are will take some liberty and I do not believe they should be punished.
|
||||
std::unordered_set<std::string> llkSplit(const std::string& s) {
|
||||
std::unordered_set<std::string> llkSplit(const std::string& prop, const std::string& def) {
|
||||
auto s = android::base::GetProperty(prop, def);
|
||||
constexpr char separators[] = ", \t:;";
|
||||
if (!s.empty() && (s != def) && strchr(separators, s[0])) s = def + s;
|
||||
|
||||
std::unordered_set<std::string> result;
|
||||
|
||||
// Special case, allow boolean false to empty the list, otherwise expected
|
||||
|
@ -637,9 +645,29 @@ std::unordered_set<std::string> llkSplit(const std::string& s) {
|
|||
|
||||
size_t base = 0;
|
||||
while (s.size() > base) {
|
||||
auto found = s.find_first_of(", \t:", base);
|
||||
// Only emplace content, empty entries are not an option
|
||||
if (found != base) result.emplace(s.substr(base, found - base));
|
||||
auto found = s.find_first_of(separators, base);
|
||||
// Only emplace unique content, empty entries are not an option
|
||||
if (found != base) {
|
||||
switch (s[base]) {
|
||||
case '-':
|
||||
++base;
|
||||
if (base >= s.size()) break;
|
||||
if (base != found) {
|
||||
auto have = result.find(s.substr(base, found - base));
|
||||
if (have != result.end()) result.erase(have);
|
||||
}
|
||||
break;
|
||||
case '+':
|
||||
++base;
|
||||
if (base >= s.size()) break;
|
||||
if (base == found) break;
|
||||
// FALLTHRU (for gcc, lint, pcc, etc; following for clang)
|
||||
FALLTHROUGH_INTENDED;
|
||||
default:
|
||||
result.emplace(s.substr(base, found - base));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found == s.npos) break;
|
||||
base = found + 1;
|
||||
}
|
||||
|
@ -648,13 +676,21 @@ std::unordered_set<std::string> llkSplit(const std::string& s) {
|
|||
|
||||
bool llkSkipName(const std::string& name,
|
||||
const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
|
||||
if ((name.size() == 0) || (blacklist.size() == 0)) {
|
||||
return false;
|
||||
}
|
||||
if (name.empty() || blacklist.empty()) return false;
|
||||
|
||||
return blacklist.find(name) != blacklist.end();
|
||||
}
|
||||
|
||||
bool llkSkipProc(proc* procp,
|
||||
const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
|
||||
if (!procp) return false;
|
||||
if (llkSkipName(std::to_string(procp->pid), blacklist)) return true;
|
||||
if (llkSkipName(procp->getComm(), blacklist)) return true;
|
||||
if (llkSkipName(procp->getCmdline(), blacklist)) return true;
|
||||
if (llkSkipName(android::base::Basename(procp->getCmdline()), blacklist)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llkSkipPid(pid_t pid) {
|
||||
return llkSkipName(std::to_string(pid), llkBlacklistProcess);
|
||||
}
|
||||
|
@ -730,11 +766,7 @@ bool llkCheckStack(proc* procp, const std::string& piddir) {
|
|||
}
|
||||
|
||||
// Don't check process that are known to block ptrace, save sepolicy noise.
|
||||
if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
|
||||
if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
|
||||
if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
|
||||
if (llkSkipName(android::base::Basename(procp->getCmdline()), llkBlacklistStack)) return false;
|
||||
|
||||
if (llkSkipProc(procp, llkBlacklistStack)) return false;
|
||||
auto kernel_stack = ReadFile(piddir + "/stack");
|
||||
if (kernel_stack.empty()) {
|
||||
LOG(VERBOSE) << piddir << "/stack empty comm=" << procp->getComm()
|
||||
|
@ -780,12 +812,12 @@ void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
|
|||
// but if there are problems we assume at least a few
|
||||
// samples of reads occur before we take any real action.
|
||||
std::string schedString = ReadFile(piddir + "/sched");
|
||||
if (schedString.size() == 0) {
|
||||
if (schedString.empty()) {
|
||||
// /schedstat is not as standardized, but in 3.1+
|
||||
// Android devices, the third field is nr_switches
|
||||
// from /sched:
|
||||
schedString = ReadFile(piddir + "/schedstat");
|
||||
if (schedString.size() == 0) {
|
||||
if (schedString.empty()) {
|
||||
return;
|
||||
}
|
||||
auto val = static_cast<unsigned long long>(-1);
|
||||
|
@ -943,7 +975,7 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
|
||||
// Get the process stat
|
||||
std::string stat = ReadFile(piddir + "/stat");
|
||||
if (stat.size() == 0) {
|
||||
if (stat.empty()) {
|
||||
continue;
|
||||
}
|
||||
unsigned tid = -1;
|
||||
|
@ -1032,11 +1064,10 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
if (pprocp == nullptr) {
|
||||
pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
|
||||
}
|
||||
if ((pprocp != nullptr) &&
|
||||
(llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
|
||||
llkSkipName(pprocp->getCmdline(), llkBlacklistParent) ||
|
||||
llkSkipName(android::base::Basename(pprocp->getCmdline()), llkBlacklistParent))) {
|
||||
break;
|
||||
if (pprocp) {
|
||||
if (llkSkipProc(pprocp, llkBlacklistParent)) break;
|
||||
} else {
|
||||
if (llkSkipName(std::to_string(ppid), llkBlacklistParent)) break;
|
||||
}
|
||||
|
||||
if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
|
||||
|
@ -1135,21 +1166,15 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
if (!p->second.updated) {
|
||||
IF_ALOG(LOG_VERBOSE, LOG_TAG) {
|
||||
std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
|
||||
if (ppidCmdline.size()) {
|
||||
ppidCmdline = "(" + ppidCmdline + ")";
|
||||
}
|
||||
if (!ppidCmdline.empty()) ppidCmdline = "(" + ppidCmdline + ")";
|
||||
std::string pidCmdline;
|
||||
if (p->second.pid != p->second.tid) {
|
||||
pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
|
||||
if (pidCmdline.size()) {
|
||||
pidCmdline = "(" + pidCmdline + ")";
|
||||
}
|
||||
if (!pidCmdline.empty()) pidCmdline = "(" + pidCmdline + ")";
|
||||
}
|
||||
std::string tidCmdline =
|
||||
llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
|
||||
if (tidCmdline.size()) {
|
||||
tidCmdline = "(" + tidCmdline + ")";
|
||||
}
|
||||
if (!tidCmdline.empty()) tidCmdline = "(" + tidCmdline + ")";
|
||||
LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
|
||||
<< pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
|
||||
}
|
||||
|
@ -1226,13 +1251,11 @@ bool llkInit(const char* threadname) {
|
|||
llkValidate(); // validate all (effectively minus llkTimeoutMs)
|
||||
#ifdef __PTRACE_ENABLED__
|
||||
if (debuggable) {
|
||||
llkCheckStackSymbols = llkSplit(
|
||||
android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
|
||||
llkCheckStackSymbols = llkSplit(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT);
|
||||
}
|
||||
std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
|
||||
if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
|
||||
llkBlacklistStack = llkSplit(
|
||||
android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
|
||||
llkBlacklistStack = llkSplit(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack);
|
||||
#endif
|
||||
std::string defaultBlacklistProcess(
|
||||
std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
|
||||
|
@ -1244,17 +1267,14 @@ bool llkInit(const char* threadname) {
|
|||
for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
|
||||
defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
|
||||
}
|
||||
defaultBlacklistProcess =
|
||||
android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
|
||||
llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
|
||||
llkBlacklistProcess = llkSplit(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
|
||||
if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
|
||||
llkBlacklistProcess.emplace("[khungtaskd]");
|
||||
}
|
||||
llkBlacklistParent = llkSplit(android::base::GetProperty(
|
||||
LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
|
||||
"," LLK_BLACKLIST_PARENT_DEFAULT));
|
||||
llkBlacklistUid =
|
||||
llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
|
||||
llkBlacklistParent = llkSplit(LLK_BLACKLIST_PARENT_PROPERTY,
|
||||
std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
|
||||
"," LLK_BLACKLIST_PARENT_DEFAULT);
|
||||
llkBlacklistUid = llkSplit(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT);
|
||||
|
||||
// internal watchdog
|
||||
::signal(SIGALRM, llkAlarmHandler);
|
||||
|
|
|
@ -87,7 +87,8 @@ seconds llkdSleepPeriod(char state) {
|
|||
execute("stop llkd-1");
|
||||
rest();
|
||||
std::string setprop("setprop ");
|
||||
execute((setprop + LLK_CHECK_STACK_PROPERTY + " SyS_openat").c_str());
|
||||
// Manually check that SyS_openat is _added_ to the list when restarted
|
||||
execute((setprop + LLK_CHECK_STACK_PROPERTY + " ,SyS_openat").c_str());
|
||||
rest();
|
||||
execute((setprop + LLK_ENABLE_WRITEABLE_PROPERTY + " false").c_str());
|
||||
rest();
|
||||
|
|
Loading…
Reference in New Issue