fdtrack: emit the most common stack in the abort message.

Bug: http://b/181875562
Test: manual
Change-Id: I2826b148d55788e932fa04dcef33c78fad8be227
This commit is contained in:
Josh Gao 2021-03-11 21:11:37 -08:00
parent 051b454ff1
commit 1cb3681335
2 changed files with 121 additions and 6 deletions

View File

@ -31,9 +31,12 @@
#include <array>
#include <mutex>
#include <thread>
#include <utility>
#include <vector>
#include <android/fdsan.h>
#include <android/set_abort_message.h>
#include <bionic/fdtrack.h>
#include <android-base/no_destructor.h>
@ -48,6 +51,7 @@ struct FdEntry {
};
extern "C" void fdtrack_dump();
extern "C" void fdtrack_dump_fatal();
using fdtrack_callback_t = bool (*)(int fd, const char* const* function_names,
const uint64_t* function_offsets, size_t count, void* arg);
@ -74,7 +78,17 @@ __attribute__((constructor)) static void ctor() {
entry.backtrace.reserve(kStackDepth);
}
signal(BIONIC_SIGNAL_FDTRACK, [](int) { fdtrack_dump(); });
struct sigaction sa = {};
sa.sa_sigaction = [](int, siginfo_t* siginfo, void*) {
if (siginfo->si_code == SI_QUEUE && siginfo->si_int == 1) {
fdtrack_dump_fatal();
} else {
fdtrack_dump();
}
};
sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
sigaction(BIONIC_SIGNAL_FDTRACK, &sa, nullptr);
if (Unwinder().Init()) {
android_fdtrack_hook_t expected = nullptr;
installed = android_fdtrack_compare_exchange_hook(&expected, &fd_hook);
@ -156,16 +170,47 @@ void fdtrack_iterate(fdtrack_callback_t callback, void* arg) {
android_fdtrack_set_enabled(prev);
}
void fdtrack_dump() {
static size_t hash_stack(const char* const* function_names, const uint64_t* function_offsets,
size_t stack_depth) {
size_t hash = 0;
for (size_t i = 0; i < stack_depth; ++i) {
// To future maintainers: if a libc++ update ever makes this invalid, replace this with +.
hash = std::__hash_combine(hash, std::hash<std::string_view>()(function_names[i]));
hash = std::__hash_combine(hash, std::hash<uint64_t>()(function_offsets[i]));
}
return hash;
}
static void fdtrack_dump_impl(bool fatal) {
if (!installed) {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fdtrack not installed");
} else {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fdtrack dumping...");
}
// If we're aborting, identify the most common stack in the hopes that it's the culprit,
// and emit that in the abort message so crash reporting can separate different fd leaks out.
// This is horrible and quadratic, but we need to avoid allocation since this can happen in
// response to a signal generated asynchronously. We're only going to dump 1k fds by default,
// and we're about to blow up the entire system, so this isn't too expensive.
struct StackInfo {
size_t hash = 0;
size_t count = 0;
size_t stack_depth = 0;
const char* function_names[kStackDepth - kStackFrameSkip];
uint64_t function_offsets[kStackDepth - kStackFrameSkip];
};
struct StackList {
size_t count = 0;
std::array<StackInfo, 128> data;
};
static StackList stacks;
fdtrack_iterate(
[](int fd, const char* const* function_names, const uint64_t* function_offsets, size_t count,
void*) {
[](int fd, const char* const* function_names, const uint64_t* function_offsets,
size_t stack_depth, void* stacks_ptr) {
auto stacks = static_cast<StackList*>(stacks_ptr);
uint64_t fdsan_owner = android_fdsan_get_owner_tag(fd);
if (fdsan_owner != 0) {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fd %d: (owner = 0x%" PRIx64 ")", fd,
@ -174,12 +219,81 @@ void fdtrack_dump() {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fd %d: (unowned)", fd);
}
for (size_t i = 0; i < count; ++i) {
for (size_t i = 0; i < stack_depth; ++i) {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", " %zu: %s+%" PRIu64, i,
function_names[i], function_offsets[i]);
}
if (stacks) {
size_t hash = hash_stack(function_names, function_offsets, stack_depth);
bool found_stack = false;
for (size_t i = 0; i < stacks->count; ++i) {
if (stacks->data[i].hash == hash) {
++stacks->data[i].count;
found_stack = true;
break;
}
}
if (!found_stack) {
if (stacks->count < stacks->data.size()) {
auto& stack = stacks->data[stacks->count++];
stack.hash = hash;
stack.count = 1;
stack.stack_depth = stack_depth;
for (size_t i = 0; i < stack_depth; ++i) {
stack.function_names[i] = function_names[i];
stack.function_offsets[i] = function_offsets[i];
}
}
}
}
return true;
},
nullptr);
fatal ? &stacks : nullptr);
if (fatal) {
// Find the most common stack.
size_t max = 0;
StackInfo* stack = nullptr;
for (size_t i = 0; i < stacks.count; ++i) {
if (stacks.data[i].count > max) {
stack = &stacks.data[i];
max = stack->count;
}
}
static char buf[1024];
if (!stack) {
async_safe_format_buffer(buf, sizeof(buf),
"aborting due to fd leak: failed to find most common stack");
} else {
char* p = buf;
p += async_safe_format_buffer(buf, sizeof(buf),
"aborting due to fd leak: most common stack =\n");
for (size_t i = 0; i < stack->stack_depth; ++i) {
ssize_t bytes_left = buf + sizeof(buf) - p;
if (bytes_left > 0) {
p += async_safe_format_buffer(p, buf + sizeof(buf) - p, " %zu: %s+%" PRIu64 "\n", i,
stack->function_names[i], stack->function_offsets[i]);
}
}
}
android_set_abort_message(buf);
// Abort on a different thread to avoid ART dumping runtime stacks.
std::thread([]() { abort(); }).join();
}
}
void fdtrack_dump() {
fdtrack_dump_impl(false);
}
void fdtrack_dump_fatal() {
fdtrack_dump_impl(true);
}

View File

@ -1,6 +1,7 @@
LIBFDTRACK {
global:
fdtrack_dump;
fdtrack_dump_fatal;
fdtrack_iterate;
local:
*;