diff --git a/libfdtrack/fdtrack.cpp b/libfdtrack/fdtrack.cpp index 898bc43db..fd562741f 100644 --- a/libfdtrack/fdtrack.cpp +++ b/libfdtrack/fdtrack.cpp @@ -31,9 +31,12 @@ #include #include +#include +#include #include #include +#include #include #include @@ -48,6 +51,7 @@ struct FdEntry { }; extern "C" void fdtrack_dump(); +extern "C" void fdtrack_dump_fatal(); using fdtrack_callback_t = bool (*)(int fd, const char* const* function_names, const uint64_t* function_offsets, size_t count, void* arg); @@ -74,7 +78,17 @@ __attribute__((constructor)) static void ctor() { entry.backtrace.reserve(kStackDepth); } - signal(BIONIC_SIGNAL_FDTRACK, [](int) { fdtrack_dump(); }); + struct sigaction sa = {}; + sa.sa_sigaction = [](int, siginfo_t* siginfo, void*) { + if (siginfo->si_code == SI_QUEUE && siginfo->si_int == 1) { + fdtrack_dump_fatal(); + } else { + fdtrack_dump(); + } + }; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + sigaction(BIONIC_SIGNAL_FDTRACK, &sa, nullptr); + if (Unwinder().Init()) { android_fdtrack_hook_t expected = nullptr; installed = android_fdtrack_compare_exchange_hook(&expected, &fd_hook); @@ -156,16 +170,47 @@ void fdtrack_iterate(fdtrack_callback_t callback, void* arg) { android_fdtrack_set_enabled(prev); } -void fdtrack_dump() { +static size_t hash_stack(const char* const* function_names, const uint64_t* function_offsets, + size_t stack_depth) { + size_t hash = 0; + for (size_t i = 0; i < stack_depth; ++i) { + // To future maintainers: if a libc++ update ever makes this invalid, replace this with +. + hash = std::__hash_combine(hash, std::hash()(function_names[i])); + hash = std::__hash_combine(hash, std::hash()(function_offsets[i])); + } + return hash; +} + +static void fdtrack_dump_impl(bool fatal) { if (!installed) { async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fdtrack not installed"); } else { async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fdtrack dumping..."); } + // If we're aborting, identify the most common stack in the hopes that it's the culprit, + // and emit that in the abort message so crash reporting can separate different fd leaks out. + // This is horrible and quadratic, but we need to avoid allocation since this can happen in + // response to a signal generated asynchronously. We're only going to dump 1k fds by default, + // and we're about to blow up the entire system, so this isn't too expensive. + struct StackInfo { + size_t hash = 0; + size_t count = 0; + + size_t stack_depth = 0; + const char* function_names[kStackDepth - kStackFrameSkip]; + uint64_t function_offsets[kStackDepth - kStackFrameSkip]; + }; + struct StackList { + size_t count = 0; + std::array data; + }; + static StackList stacks; + fdtrack_iterate( - [](int fd, const char* const* function_names, const uint64_t* function_offsets, size_t count, - void*) { + [](int fd, const char* const* function_names, const uint64_t* function_offsets, + size_t stack_depth, void* stacks_ptr) { + auto stacks = static_cast(stacks_ptr); uint64_t fdsan_owner = android_fdsan_get_owner_tag(fd); if (fdsan_owner != 0) { async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fd %d: (owner = 0x%" PRIx64 ")", fd, @@ -174,12 +219,81 @@ void fdtrack_dump() { async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fd %d: (unowned)", fd); } - for (size_t i = 0; i < count; ++i) { + for (size_t i = 0; i < stack_depth; ++i) { async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", " %zu: %s+%" PRIu64, i, function_names[i], function_offsets[i]); } + if (stacks) { + size_t hash = hash_stack(function_names, function_offsets, stack_depth); + bool found_stack = false; + for (size_t i = 0; i < stacks->count; ++i) { + if (stacks->data[i].hash == hash) { + ++stacks->data[i].count; + found_stack = true; + break; + } + } + + if (!found_stack) { + if (stacks->count < stacks->data.size()) { + auto& stack = stacks->data[stacks->count++]; + stack.hash = hash; + stack.count = 1; + stack.stack_depth = stack_depth; + for (size_t i = 0; i < stack_depth; ++i) { + stack.function_names[i] = function_names[i]; + stack.function_offsets[i] = function_offsets[i]; + } + } + } + } + return true; }, - nullptr); + fatal ? &stacks : nullptr); + + if (fatal) { + // Find the most common stack. + size_t max = 0; + StackInfo* stack = nullptr; + for (size_t i = 0; i < stacks.count; ++i) { + if (stacks.data[i].count > max) { + stack = &stacks.data[i]; + max = stack->count; + } + } + + static char buf[1024]; + + if (!stack) { + async_safe_format_buffer(buf, sizeof(buf), + "aborting due to fd leak: failed to find most common stack"); + } else { + char* p = buf; + p += async_safe_format_buffer(buf, sizeof(buf), + "aborting due to fd leak: most common stack =\n"); + + for (size_t i = 0; i < stack->stack_depth; ++i) { + ssize_t bytes_left = buf + sizeof(buf) - p; + if (bytes_left > 0) { + p += async_safe_format_buffer(p, buf + sizeof(buf) - p, " %zu: %s+%" PRIu64 "\n", i, + stack->function_names[i], stack->function_offsets[i]); + } + } + } + + android_set_abort_message(buf); + + // Abort on a different thread to avoid ART dumping runtime stacks. + std::thread([]() { abort(); }).join(); + } +} + +void fdtrack_dump() { + fdtrack_dump_impl(false); +} + +void fdtrack_dump_fatal() { + fdtrack_dump_impl(true); } diff --git a/libfdtrack/libfdtrack.map.txt b/libfdtrack/libfdtrack.map.txt index 7a23954d8..6c4015c00 100644 --- a/libfdtrack/libfdtrack.map.txt +++ b/libfdtrack/libfdtrack.map.txt @@ -1,6 +1,7 @@ LIBFDTRACK { global: fdtrack_dump; + fdtrack_dump_fatal; fdtrack_iterate; local: *;