[MTE] Implement permissive (recoverable) MTE for apps
Extends the recoverable native crash handling support to also allow for MTE crashes to be recovered from in apps. Backs onto the existing GWP-ASan recoverable handling. At least for system/core, I've renamed the (now) generic "recoverable crash" variables to remove the notion of GWP-ASan. Permissive MTE should no longer crash an app, and crashes are still visible in the AppExitInfo API and tombstones. Test: atest CtsTaggingHostTestCases Bug: 328793166 Change-Id: I4c6ffa85af0e0d9b72d0ccd606bb6e1ca464cfff
This commit is contained in:
parent
851c857420
commit
31407917f8
|
@ -143,7 +143,7 @@ static bool ptrace_interrupt(pid_t tid, int* received_signal) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool activity_manager_notify(pid_t pid, int signal, const std::string& amfd_data,
|
static bool activity_manager_notify(pid_t pid, int signal, const std::string& amfd_data,
|
||||||
bool recoverable_gwp_asan_crash) {
|
bool recoverable_crash) {
|
||||||
ATRACE_CALL();
|
ATRACE_CALL();
|
||||||
android::base::unique_fd amfd(socket_local_client(
|
android::base::unique_fd amfd(socket_local_client(
|
||||||
"/data/system/ndebugsocket", ANDROID_SOCKET_NAMESPACE_FILESYSTEM, SOCK_STREAM));
|
"/data/system/ndebugsocket", ANDROID_SOCKET_NAMESPACE_FILESYSTEM, SOCK_STREAM));
|
||||||
|
@ -169,7 +169,7 @@ static bool activity_manager_notify(pid_t pid, int signal, const std::string& am
|
||||||
// Activity Manager protocol:
|
// Activity Manager protocol:
|
||||||
// - 32-bit network-byte-order: pid
|
// - 32-bit network-byte-order: pid
|
||||||
// - 32-bit network-byte-order: signal number
|
// - 32-bit network-byte-order: signal number
|
||||||
// - byte: recoverable_gwp_asan_crash
|
// - byte: recoverable_crash
|
||||||
// - bytes: raw text of the dump
|
// - bytes: raw text of the dump
|
||||||
// - null terminator
|
// - null terminator
|
||||||
|
|
||||||
|
@ -185,10 +185,9 @@ static bool activity_manager_notify(pid_t pid, int signal, const std::string& am
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t recoverable_gwp_asan_crash_byte = recoverable_gwp_asan_crash ? 1 : 0;
|
uint8_t recoverable_crash_byte = recoverable_crash ? 1 : 0;
|
||||||
if (!android::base::WriteFully(amfd, &recoverable_gwp_asan_crash_byte,
|
if (!android::base::WriteFully(amfd, &recoverable_crash_byte, sizeof(recoverable_crash_byte))) {
|
||||||
sizeof(recoverable_gwp_asan_crash_byte))) {
|
PLOG(ERROR) << "AM recoverable_crash_byte write failed";
|
||||||
PLOG(ERROR) << "AM recoverable_gwp_asan_crash_byte write failed";
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -280,11 +279,11 @@ static void ParseArgs(int argc, char** argv, pid_t* pseudothread_tid, DebuggerdD
|
||||||
|
|
||||||
static void ReadCrashInfo(unique_fd& fd, siginfo_t* siginfo,
|
static void ReadCrashInfo(unique_fd& fd, siginfo_t* siginfo,
|
||||||
std::unique_ptr<unwindstack::Regs>* regs, ProcessInfo* process_info,
|
std::unique_ptr<unwindstack::Regs>* regs, ProcessInfo* process_info,
|
||||||
bool* recoverable_gwp_asan_crash) {
|
bool* recoverable_crash) {
|
||||||
std::aligned_storage<sizeof(CrashInfo) + 1, alignof(CrashInfo)>::type buf;
|
std::aligned_storage<sizeof(CrashInfo) + 1, alignof(CrashInfo)>::type buf;
|
||||||
CrashInfo* crash_info = reinterpret_cast<CrashInfo*>(&buf);
|
CrashInfo* crash_info = reinterpret_cast<CrashInfo*>(&buf);
|
||||||
ssize_t rc = TEMP_FAILURE_RETRY(read(fd.get(), &buf, sizeof(buf)));
|
ssize_t rc = TEMP_FAILURE_RETRY(read(fd.get(), &buf, sizeof(buf)));
|
||||||
*recoverable_gwp_asan_crash = false;
|
*recoverable_crash = false;
|
||||||
if (rc == -1) {
|
if (rc == -1) {
|
||||||
PLOG(FATAL) << "failed to read target ucontext";
|
PLOG(FATAL) << "failed to read target ucontext";
|
||||||
} else {
|
} else {
|
||||||
|
@ -321,7 +320,7 @@ static void ReadCrashInfo(unique_fd& fd, siginfo_t* siginfo,
|
||||||
process_info->scudo_region_info = crash_info->data.d.scudo_region_info;
|
process_info->scudo_region_info = crash_info->data.d.scudo_region_info;
|
||||||
process_info->scudo_ring_buffer = crash_info->data.d.scudo_ring_buffer;
|
process_info->scudo_ring_buffer = crash_info->data.d.scudo_ring_buffer;
|
||||||
process_info->scudo_ring_buffer_size = crash_info->data.d.scudo_ring_buffer_size;
|
process_info->scudo_ring_buffer_size = crash_info->data.d.scudo_ring_buffer_size;
|
||||||
*recoverable_gwp_asan_crash = crash_info->data.d.recoverable_gwp_asan_crash;
|
*recoverable_crash = crash_info->data.d.recoverable_crash;
|
||||||
process_info->crash_detail_page = crash_info->data.d.crash_detail_page;
|
process_info->crash_detail_page = crash_info->data.d.crash_detail_page;
|
||||||
FALLTHROUGH_INTENDED;
|
FALLTHROUGH_INTENDED;
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -487,7 +486,7 @@ int main(int argc, char** argv) {
|
||||||
std::map<pid_t, ThreadInfo> thread_info;
|
std::map<pid_t, ThreadInfo> thread_info;
|
||||||
siginfo_t siginfo;
|
siginfo_t siginfo;
|
||||||
std::string error;
|
std::string error;
|
||||||
bool recoverable_gwp_asan_crash = false;
|
bool recoverable_crash = false;
|
||||||
|
|
||||||
{
|
{
|
||||||
ATRACE_NAME("ptrace");
|
ATRACE_NAME("ptrace");
|
||||||
|
@ -539,8 +538,7 @@ int main(int argc, char** argv) {
|
||||||
|
|
||||||
if (thread == g_target_thread) {
|
if (thread == g_target_thread) {
|
||||||
// Read the thread's registers along with the rest of the crash info out of the pipe.
|
// Read the thread's registers along with the rest of the crash info out of the pipe.
|
||||||
ReadCrashInfo(input_pipe, &siginfo, &info.registers, &process_info,
|
ReadCrashInfo(input_pipe, &siginfo, &info.registers, &process_info, &recoverable_crash);
|
||||||
&recoverable_gwp_asan_crash);
|
|
||||||
info.siginfo = &siginfo;
|
info.siginfo = &siginfo;
|
||||||
info.signo = info.siginfo->si_signo;
|
info.signo = info.siginfo->si_signo;
|
||||||
|
|
||||||
|
@ -670,7 +668,7 @@ int main(int argc, char** argv) {
|
||||||
if (fatal_signal) {
|
if (fatal_signal) {
|
||||||
// Don't try to notify ActivityManager if it just crashed, or we might hang until timeout.
|
// Don't try to notify ActivityManager if it just crashed, or we might hang until timeout.
|
||||||
if (thread_info[target_process].thread_name != "system_server") {
|
if (thread_info[target_process].thread_name != "system_server") {
|
||||||
activity_manager_notify(target_process, signo, amfd_data, recoverable_gwp_asan_crash);
|
activity_manager_notify(target_process, signo, amfd_data, recoverable_crash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -396,7 +396,7 @@ static int debuggerd_dispatch_pseudothread(void* arg) {
|
||||||
ASSERT_SAME_OFFSET(scudo_ring_buffer, scudo_ring_buffer);
|
ASSERT_SAME_OFFSET(scudo_ring_buffer, scudo_ring_buffer);
|
||||||
ASSERT_SAME_OFFSET(scudo_ring_buffer_size, scudo_ring_buffer_size);
|
ASSERT_SAME_OFFSET(scudo_ring_buffer_size, scudo_ring_buffer_size);
|
||||||
ASSERT_SAME_OFFSET(scudo_stack_depot_size, scudo_stack_depot_size);
|
ASSERT_SAME_OFFSET(scudo_stack_depot_size, scudo_stack_depot_size);
|
||||||
ASSERT_SAME_OFFSET(recoverable_gwp_asan_crash, recoverable_gwp_asan_crash);
|
ASSERT_SAME_OFFSET(recoverable_crash, recoverable_crash);
|
||||||
ASSERT_SAME_OFFSET(crash_detail_page, crash_detail_page);
|
ASSERT_SAME_OFFSET(crash_detail_page, crash_detail_page);
|
||||||
#undef ASSERT_SAME_OFFSET
|
#undef ASSERT_SAME_OFFSET
|
||||||
|
|
||||||
|
@ -573,6 +573,7 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
|
||||||
}
|
}
|
||||||
|
|
||||||
gwp_asan_callbacks_t gwp_asan_callbacks = {};
|
gwp_asan_callbacks_t gwp_asan_callbacks = {};
|
||||||
|
bool recoverable_gwp_asan_crash = false;
|
||||||
if (g_callbacks.get_gwp_asan_callbacks != nullptr) {
|
if (g_callbacks.get_gwp_asan_callbacks != nullptr) {
|
||||||
// GWP-ASan catches use-after-free and heap-buffer-overflow by using PROT_NONE
|
// GWP-ASan catches use-after-free and heap-buffer-overflow by using PROT_NONE
|
||||||
// guard pages, which lead to SEGV. Normally, debuggerd prints a bug report
|
// guard pages, which lead to SEGV. Normally, debuggerd prints a bug report
|
||||||
|
@ -587,10 +588,30 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
|
||||||
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report &&
|
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report &&
|
||||||
gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
|
gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
|
||||||
gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
|
gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
|
||||||
process_info.recoverable_gwp_asan_crash = true;
|
recoverable_gwp_asan_crash = true;
|
||||||
|
process_info.recoverable_crash = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (info->si_signo == SIGSEGV &&
|
||||||
|
(info->si_code == SEGV_MTESERR || info->si_code == SEGV_MTEAERR) && is_permissive_mte()) {
|
||||||
|
process_info.recoverable_crash = true;
|
||||||
|
// If we are in permissive MTE mode, we do not crash, but instead disable MTE on this thread,
|
||||||
|
// and then let the failing instruction be retried. The second time should work (except
|
||||||
|
// if there is another non-MTE fault).
|
||||||
|
int tagged_addr_ctrl = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
|
||||||
|
if (tagged_addr_ctrl < 0) {
|
||||||
|
fatal_errno("failed to PR_GET_TAGGED_ADDR_CTRL");
|
||||||
|
}
|
||||||
|
tagged_addr_ctrl = (tagged_addr_ctrl & ~PR_MTE_TCF_MASK) | PR_MTE_TCF_NONE;
|
||||||
|
if (prctl(PR_SET_TAGGED_ADDR_CTRL, tagged_addr_ctrl, 0, 0, 0) < 0) {
|
||||||
|
fatal_errno("failed to PR_SET_TAGGED_ADDR_CTRL");
|
||||||
|
}
|
||||||
|
async_safe_format_log(ANDROID_LOG_ERROR, "libc",
|
||||||
|
"MTE ERROR DETECTED BUT RUNNING IN PERMISSIVE MODE. CONTINUING.");
|
||||||
|
pthread_mutex_unlock(&crash_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
// If sival_int is ~0, it means that the fallback handler has been called
|
// If sival_int is ~0, it means that the fallback handler has been called
|
||||||
// once before and this function is being called again to dump the stack
|
// once before and this function is being called again to dump the stack
|
||||||
// of a specific thread. It is possible that the prctl call might return 1,
|
// of a specific thread. It is possible that the prctl call might return 1,
|
||||||
|
@ -602,7 +623,7 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
|
||||||
// you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
|
// you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
|
||||||
// ANR trace.
|
// ANR trace.
|
||||||
debuggerd_fallback_handler(info, ucontext, process_info.abort_msg);
|
debuggerd_fallback_handler(info, ucontext, process_info.abort_msg);
|
||||||
if (no_new_privs && process_info.recoverable_gwp_asan_crash) {
|
if (no_new_privs && recoverable_gwp_asan_crash) {
|
||||||
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
|
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -679,29 +700,14 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
|
||||||
// If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
|
// If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
|
||||||
// starting to dump right before our death.
|
// starting to dump right before our death.
|
||||||
pthread_mutex_unlock(&crash_mutex);
|
pthread_mutex_unlock(&crash_mutex);
|
||||||
} else if (process_info.recoverable_gwp_asan_crash) {
|
} else if (process_info.recoverable_crash) {
|
||||||
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
|
if (recoverable_gwp_asan_crash) {
|
||||||
|
gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
|
||||||
|
}
|
||||||
pthread_mutex_unlock(&crash_mutex);
|
pthread_mutex_unlock(&crash_mutex);
|
||||||
}
|
}
|
||||||
#ifdef __aarch64__
|
#ifdef __aarch64__
|
||||||
else if (info->si_signo == SIGSEGV &&
|
else if (info->si_signo == SIGSEGV && info->si_code == SEGV_MTEAERR && getppid() == 1) {
|
||||||
(info->si_code == SEGV_MTESERR || info->si_code == SEGV_MTEAERR) &&
|
|
||||||
is_permissive_mte()) {
|
|
||||||
// If we are in permissive MTE mode, we do not crash, but instead disable MTE on this thread,
|
|
||||||
// and then let the failing instruction be retried. The second time should work (except
|
|
||||||
// if there is another non-MTE fault).
|
|
||||||
int tagged_addr_ctrl = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
|
|
||||||
if (tagged_addr_ctrl < 0) {
|
|
||||||
fatal_errno("failed to PR_GET_TAGGED_ADDR_CTRL");
|
|
||||||
}
|
|
||||||
tagged_addr_ctrl = (tagged_addr_ctrl & ~PR_MTE_TCF_MASK) | PR_MTE_TCF_NONE;
|
|
||||||
if (prctl(PR_SET_TAGGED_ADDR_CTRL, tagged_addr_ctrl, 0, 0, 0) < 0) {
|
|
||||||
fatal_errno("failed to PR_SET_TAGGED_ADDR_CTRL");
|
|
||||||
}
|
|
||||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc",
|
|
||||||
"MTE ERROR DETECTED BUT RUNNING IN PERMISSIVE MODE. CONTINUING.");
|
|
||||||
pthread_mutex_unlock(&crash_mutex);
|
|
||||||
} else if (info->si_signo == SIGSEGV && info->si_code == SEGV_MTEAERR && getppid() == 1) {
|
|
||||||
// Back channel to init (see system/core/init/service.cpp) to signal that
|
// Back channel to init (see system/core/init/service.cpp) to signal that
|
||||||
// this process crashed due to an ASYNC MTE fault and should be considered
|
// this process crashed due to an ASYNC MTE fault and should be considered
|
||||||
// for upgrade to SYNC mode. We are re-using the ART profiler signal, which
|
// for upgrade to SYNC mode. We are re-using the ART profiler signal, which
|
||||||
|
@ -761,18 +767,7 @@ void debuggerd_init(debuggerd_callbacks_t* callbacks) {
|
||||||
debuggerd_register_handlers(&action);
|
debuggerd_register_handlers(&action);
|
||||||
}
|
}
|
||||||
|
|
||||||
// When debuggerd's signal handler is the first handler called, it's great at
|
bool debuggerd_handle_gwp_asan_signal(int signal_number, siginfo_t* info, void* context) {
|
||||||
// handling the recoverable GWP-ASan mode. For apps, sigchain (from libart) is
|
|
||||||
// always the first signal handler, and so the following function is what
|
|
||||||
// sigchain must call before processing the signal. This allows for processing
|
|
||||||
// of a potentially recoverable GWP-ASan crash. If the signal requires GWP-ASan
|
|
||||||
// recovery, then dump a report (via the regular debuggerd hanndler), and patch
|
|
||||||
// up the allocator, and allow the process to continue (indicated by returning
|
|
||||||
// 'true'). If the crash has nothing to do with GWP-ASan, or recovery isn't
|
|
||||||
// possible, return 'false'.
|
|
||||||
bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context) {
|
|
||||||
if (signal_number != SIGSEGV || !signal_has_si_addr(info)) return false;
|
|
||||||
|
|
||||||
if (g_callbacks.get_gwp_asan_callbacks == nullptr) return false;
|
if (g_callbacks.get_gwp_asan_callbacks == nullptr) return false;
|
||||||
gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
|
gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
|
||||||
if (gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery == nullptr ||
|
if (gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery == nullptr ||
|
||||||
|
@ -810,3 +805,33 @@ bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context)
|
||||||
pthread_mutex_unlock(&first_crash_mutex);
|
pthread_mutex_unlock(&first_crash_mutex);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// When debuggerd's signal handler is the first handler called, it's great at
|
||||||
|
// handling the recoverable GWP-ASan and permissive MTE modes. For apps,
|
||||||
|
// sigchain (from libart) is always the first signal handler, and so the
|
||||||
|
// following function is what sigchain must call before processing the signal.
|
||||||
|
// This allows for processing of a potentially recoverable GWP-ASan or MTE
|
||||||
|
// crash. If the signal requires recovery, then dump a report (via the regular
|
||||||
|
// debuggerd hanndler), and patch up the allocator (in the case of GWP-ASan) or
|
||||||
|
// disable MTE on the thread, and allow the process to continue (indicated by
|
||||||
|
// returning 'true'). If the crash has nothing to do with GWP-ASan/MTE, or
|
||||||
|
// recovery isn't possible, return 'false'.
|
||||||
|
bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context) {
|
||||||
|
if (signal_number != SIGSEGV) return false;
|
||||||
|
if (info->si_code == SEGV_MTEAERR || info->si_code == SEGV_MTESERR) {
|
||||||
|
if (!is_permissive_mte()) return false;
|
||||||
|
// Because permissive MTE disables MTE for the entire thread, we're less
|
||||||
|
// worried about getting a whole bunch of crashes in a row. ActivityManager
|
||||||
|
// doesn't like multiple native crashes for an app in a short period of time
|
||||||
|
// (see the comment about recoverable GWP-ASan in
|
||||||
|
// `debuggerd_handle_gwp_asan_signal`), but that shouldn't happen if MTE is
|
||||||
|
// disabled for the entire thread. This might need to be changed if there's
|
||||||
|
// some low-hanging bug that happens across multiple threads in quick
|
||||||
|
// succession.
|
||||||
|
debuggerd_signal_handler(signal_number, info, context);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!signal_has_si_addr(info)) return false;
|
||||||
|
return debuggerd_handle_gwp_asan_signal(signal_number, info, context);
|
||||||
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ struct __attribute__((packed)) debugger_process_info {
|
||||||
const char* scudo_ring_buffer;
|
const char* scudo_ring_buffer;
|
||||||
size_t scudo_ring_buffer_size;
|
size_t scudo_ring_buffer_size;
|
||||||
size_t scudo_stack_depot_size;
|
size_t scudo_stack_depot_size;
|
||||||
bool recoverable_gwp_asan_crash;
|
bool recoverable_crash;
|
||||||
struct crash_detail_page_t* crash_detail_page;
|
struct crash_detail_page_t* crash_detail_page;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -100,7 +100,7 @@ struct __attribute__((__packed__)) CrashInfoDataDynamic : public CrashInfoDataSt
|
||||||
uintptr_t scudo_ring_buffer;
|
uintptr_t scudo_ring_buffer;
|
||||||
size_t scudo_ring_buffer_size;
|
size_t scudo_ring_buffer_size;
|
||||||
size_t scudo_stack_depot_size;
|
size_t scudo_stack_depot_size;
|
||||||
bool recoverable_gwp_asan_crash;
|
bool recoverable_crash;
|
||||||
uintptr_t crash_detail_page;
|
uintptr_t crash_detail_page;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue