llkd: enhance list properties

Because of the limited length of properties, and to ease the
complexity of product and vendor adjustments, the comma separated
list properties will use a leading comma to preserve the defaults
and add or subtract entries with + and - prefixes respectively.

Without the leading comma, the list is explicitly specified as before.

Cleanup:
- use empty() instead of space() == 0 (or converse if != 0)
- if (unlikely) pprocp can not be allocated, to a to_string(ppid) check

For testing, observe before and after llkd_unit_test below to
confirm leading comma effects for example:

livelock: ro.llk.stack=wait_on_page_bit_killable,bit_wait_io,\
                       __get_user_pages,cma_alloc
livelock: ro.llk.stack=...,SyS_openat,...

Test: llkd_unit_test
Bug: 120983740
Change-Id: Ia3d164c2fdac5295a474c6c1294a34e4ae9d0b61
This commit is contained in:
Mark Salyzyn 2019-01-03 08:39:38 -08:00
parent b658ffa2f3
commit 8a5f081763
3 changed files with 73 additions and 51 deletions

View File

@ -86,7 +86,13 @@ kernel instead of deal with more graceful kill operation.
Android Properties
------------------
Android Properties llkd respond to (*prop*_ms parms are in milliseconds):
The following are the Android Properties llkd respond to.
*prop*_ms named properties are in milliseconds.
Properties that use comma (*,*) separator for lists, use a leading separator to
preserve default and add or subtract entries with (*optional*) plus (*+*) and
minus (*-*) prefixes respectively.
For these lists, the string "*false*" is synonymous with an *empty* list,
and *blank* or *missing* resorts to the specified *default* value.
#### ro.config.low_ram
device is configured with limited memory.
@ -137,7 +143,6 @@ default 2 minutes samples of threads for D or Z.
#### ro.llk.stack
default cma_alloc,__get_user_pages,bit_wait_io,wait_on_page_bit_killable
comma separated list of kernel symbols.
The string "*false*" is the equivalent to an *empty* list.
Look for kernel stack symbols that if ever persistently present can
indicate a subsystem is locked up.
Beware, check does not on purpose do forward scheduling ABA except by polling
@ -154,7 +159,6 @@ concerns on user builds prevents this checking.
default 0,1,2 (kernel, init and [kthreadd]) plus process names
init,[kthreadd],[khungtaskd],lmkd,llkd,watchdogd,
[watchdogd],[watchdogd/0],...,[watchdogd/***get_nprocs**-1*].
The string "*false*" is the equivalent to an *empty* list.
Do not watch these processes. A process can be comm, cmdline or pid reference.
NB: automated default here can be larger than the current maximum property
size of 92.
@ -162,18 +166,15 @@ NB: false is a very very very unlikely process to want to blacklist.
#### ro.llk.blacklist.parent
default 0,2,adbd (kernel, [kthreadd] and adbd).
The string "*false*" is the equivalent to an *empty* list.
Do not watch processes that have this parent.
A parent process can be comm, cmdline or pid reference.
#### ro.llk.blacklist.uid
default *empty* or false, comma separated list of uid numbers or names.
The string "*false*" is the equivalent to an *empty* list.
Do not watch processes that match this uid.
#### ro.llk.blacklist.process.stack
default process names init,lmkd.llkd,llkd,keystore,ueventd,apexd,logd.
The string "*false*" is the equivalent to an *empty* list.
This subset of processes are not monitored for live lock stack signatures.
Also prevents the sepolicy violation associated with processes that block
ptrace, as these can not be checked anyways.

View File

@ -24,6 +24,7 @@
#include <pwd.h> // getpwuid()
#include <signal.h>
#include <stdint.h>
#include <string.h>
#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
#include <sys/mman.h> // mlockall()
#include <sys/prctl.h>
@ -617,17 +618,24 @@ std::string llkFormat(bool flag) {
std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
std::string ret;
for (const auto& entry : blacklist) {
if (ret.size()) {
ret += ",";
}
if (!ret.empty()) ret += ",";
ret += entry;
}
return ret;
}
// This function parses the properties as a list, incorporating the supplied
// default. A leading comma separator means preserve the defaults and add
// entries (with an optional leading + sign), or removes entries with a leading
// - sign.
//
// We only officially support comma separators, but wetware being what they
// are will take some liberty and I do not believe they should be punished.
std::unordered_set<std::string> llkSplit(const std::string& s) {
std::unordered_set<std::string> llkSplit(const std::string& prop, const std::string& def) {
auto s = android::base::GetProperty(prop, def);
constexpr char separators[] = ", \t:;";
if (!s.empty() && (s != def) && strchr(separators, s[0])) s = def + s;
std::unordered_set<std::string> result;
// Special case, allow boolean false to empty the list, otherwise expected
@ -637,9 +645,29 @@ std::unordered_set<std::string> llkSplit(const std::string& s) {
size_t base = 0;
while (s.size() > base) {
auto found = s.find_first_of(", \t:", base);
// Only emplace content, empty entries are not an option
if (found != base) result.emplace(s.substr(base, found - base));
auto found = s.find_first_of(separators, base);
// Only emplace unique content, empty entries are not an option
if (found != base) {
switch (s[base]) {
case '-':
++base;
if (base >= s.size()) break;
if (base != found) {
auto have = result.find(s.substr(base, found - base));
if (have != result.end()) result.erase(have);
}
break;
case '+':
++base;
if (base >= s.size()) break;
if (base == found) break;
// FALLTHRU (for gcc, lint, pcc, etc; following for clang)
FALLTHROUGH_INTENDED;
default:
result.emplace(s.substr(base, found - base));
break;
}
}
if (found == s.npos) break;
base = found + 1;
}
@ -648,13 +676,21 @@ std::unordered_set<std::string> llkSplit(const std::string& s) {
bool llkSkipName(const std::string& name,
const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
if ((name.size() == 0) || (blacklist.size() == 0)) {
return false;
}
if (name.empty() || blacklist.empty()) return false;
return blacklist.find(name) != blacklist.end();
}
bool llkSkipProc(proc* procp,
const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
if (!procp) return false;
if (llkSkipName(std::to_string(procp->pid), blacklist)) return true;
if (llkSkipName(procp->getComm(), blacklist)) return true;
if (llkSkipName(procp->getCmdline(), blacklist)) return true;
if (llkSkipName(android::base::Basename(procp->getCmdline()), blacklist)) return true;
return false;
}
bool llkSkipPid(pid_t pid) {
return llkSkipName(std::to_string(pid), llkBlacklistProcess);
}
@ -730,11 +766,7 @@ bool llkCheckStack(proc* procp, const std::string& piddir) {
}
// Don't check process that are known to block ptrace, save sepolicy noise.
if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
if (llkSkipName(android::base::Basename(procp->getCmdline()), llkBlacklistStack)) return false;
if (llkSkipProc(procp, llkBlacklistStack)) return false;
auto kernel_stack = ReadFile(piddir + "/stack");
if (kernel_stack.empty()) {
LOG(VERBOSE) << piddir << "/stack empty comm=" << procp->getComm()
@ -780,12 +812,12 @@ void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
// but if there are problems we assume at least a few
// samples of reads occur before we take any real action.
std::string schedString = ReadFile(piddir + "/sched");
if (schedString.size() == 0) {
if (schedString.empty()) {
// /schedstat is not as standardized, but in 3.1+
// Android devices, the third field is nr_switches
// from /sched:
schedString = ReadFile(piddir + "/schedstat");
if (schedString.size() == 0) {
if (schedString.empty()) {
return;
}
auto val = static_cast<unsigned long long>(-1);
@ -943,7 +975,7 @@ milliseconds llkCheck(bool checkRunning) {
// Get the process stat
std::string stat = ReadFile(piddir + "/stat");
if (stat.size() == 0) {
if (stat.empty()) {
continue;
}
unsigned tid = -1;
@ -1032,11 +1064,10 @@ milliseconds llkCheck(bool checkRunning) {
if (pprocp == nullptr) {
pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
}
if ((pprocp != nullptr) &&
(llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
llkSkipName(pprocp->getCmdline(), llkBlacklistParent) ||
llkSkipName(android::base::Basename(pprocp->getCmdline()), llkBlacklistParent))) {
break;
if (pprocp) {
if (llkSkipProc(pprocp, llkBlacklistParent)) break;
} else {
if (llkSkipName(std::to_string(ppid), llkBlacklistParent)) break;
}
if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
@ -1135,21 +1166,15 @@ milliseconds llkCheck(bool checkRunning) {
if (!p->second.updated) {
IF_ALOG(LOG_VERBOSE, LOG_TAG) {
std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
if (ppidCmdline.size()) {
ppidCmdline = "(" + ppidCmdline + ")";
}
if (!ppidCmdline.empty()) ppidCmdline = "(" + ppidCmdline + ")";
std::string pidCmdline;
if (p->second.pid != p->second.tid) {
pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
if (pidCmdline.size()) {
pidCmdline = "(" + pidCmdline + ")";
}
if (!pidCmdline.empty()) pidCmdline = "(" + pidCmdline + ")";
}
std::string tidCmdline =
llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
if (tidCmdline.size()) {
tidCmdline = "(" + tidCmdline + ")";
}
if (!tidCmdline.empty()) tidCmdline = "(" + tidCmdline + ")";
LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
<< pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
}
@ -1226,13 +1251,11 @@ bool llkInit(const char* threadname) {
llkValidate(); // validate all (effectively minus llkTimeoutMs)
#ifdef __PTRACE_ENABLED__
if (debuggable) {
llkCheckStackSymbols = llkSplit(
android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
llkCheckStackSymbols = llkSplit(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT);
}
std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
llkBlacklistStack = llkSplit(
android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
llkBlacklistStack = llkSplit(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack);
#endif
std::string defaultBlacklistProcess(
std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
@ -1244,17 +1267,14 @@ bool llkInit(const char* threadname) {
for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
}
defaultBlacklistProcess =
android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
llkBlacklistProcess = llkSplit(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
llkBlacklistProcess.emplace("[khungtaskd]");
}
llkBlacklistParent = llkSplit(android::base::GetProperty(
LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
"," LLK_BLACKLIST_PARENT_DEFAULT));
llkBlacklistUid =
llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
llkBlacklistParent = llkSplit(LLK_BLACKLIST_PARENT_PROPERTY,
std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
"," LLK_BLACKLIST_PARENT_DEFAULT);
llkBlacklistUid = llkSplit(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT);
// internal watchdog
::signal(SIGALRM, llkAlarmHandler);

View File

@ -87,7 +87,8 @@ seconds llkdSleepPeriod(char state) {
execute("stop llkd-1");
rest();
std::string setprop("setprop ");
execute((setprop + LLK_CHECK_STACK_PROPERTY + " SyS_openat").c_str());
// Manually check that SyS_openat is _added_ to the list when restarted
execute((setprop + LLK_CHECK_STACK_PROPERTY + " ,SyS_openat").c_str());
rest();
execute((setprop + LLK_ENABLE_WRITEABLE_PROPERTY + " false").c_str());
rest();