Merge "riscv64 SCS support."
This commit is contained in:
commit
060cd2438c
|
@ -46,8 +46,6 @@
|
|||
// 0 sigflag/cookie setjmp cookie in top 31 bits, signal mask flag in low bit
|
||||
// 1 sigmask signal mask (not used with _setjmp / _longjmp)
|
||||
// 2 core_base base of core registers (x18-x30, sp)
|
||||
// (We only store the low bits of x18 to avoid leaking the
|
||||
// shadow call stack address into memory.)
|
||||
// 16 float_base base of float registers (d8-d15)
|
||||
// 24 checksum checksum of core registers
|
||||
// 25 reserved reserved entries (room to grow)
|
||||
|
@ -68,8 +66,6 @@
|
|||
#define _JB_D8_D9 (_JB_D10_D11 + 2)
|
||||
#define _JB_CHECKSUM (_JB_D8_D9 + 2)
|
||||
|
||||
#define SCS_MASK (SCS_SIZE - 1)
|
||||
|
||||
.macro m_mangle_registers reg, sp_reg
|
||||
eor x3, x3, \reg
|
||||
eor x19, x19, \reg
|
||||
|
@ -155,6 +151,9 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(sigsetjmp)
|
|||
bic x1, x1, #1
|
||||
|
||||
// Mask off the high bits of the shadow call stack pointer.
|
||||
// We only store the low bits of x18 to avoid leaking the
|
||||
// shadow call stack address into memory.
|
||||
// See the SCS commentary in pthread_internal.h for more detail.
|
||||
and x3, x18, #SCS_MASK
|
||||
|
||||
// Save core registers.
|
||||
|
|
|
@ -79,7 +79,7 @@
|
|||
.macro m_mangle_registers reg, sp_reg
|
||||
xor s0, s0, \reg
|
||||
xor s1, s1, \reg
|
||||
xor s2, s2, \reg
|
||||
xor a4, a4, \reg // a4 is the masked s2 (x18) for SCS.
|
||||
xor s3, s3, \reg
|
||||
xor s4, s4, \reg
|
||||
xor s5, s5, \reg
|
||||
|
@ -151,13 +151,20 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(sigsetjmp)
|
|||
ld a1, _JB_SIGFLAG(a0)
|
||||
andi a1, a1, -2
|
||||
|
||||
// Mask off the high bits of the shadow call stack pointer.
|
||||
// We only store the low bits of x18 to avoid leaking the
|
||||
// shadow call stack address into memory.
|
||||
// See the SCS commentary in pthread_internal.h for more detail.
|
||||
li a4, SCS_MASK
|
||||
and a4, a4, x18
|
||||
|
||||
// Save core registers.
|
||||
mv a2, sp
|
||||
m_mangle_registers a1, sp_reg=a2
|
||||
sd ra, _JB_RA(a0)
|
||||
sd s0, _JB_S0(a0)
|
||||
sd s1, _JB_S1(a0)
|
||||
sd s2, _JB_S2(a0)
|
||||
sd a4, _JB_S2(a0) // a4 is the masked s2 (x18) for SCS.
|
||||
sd s3, _JB_S3(a0)
|
||||
sd s4, _JB_S4(a0)
|
||||
sd s5, _JB_S5(a0)
|
||||
|
@ -231,7 +238,7 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(siglongjmp)
|
|||
ld ra, _JB_RA(a0)
|
||||
ld s0, _JB_S0(a0)
|
||||
ld s1, _JB_S1(a0)
|
||||
ld s2, _JB_S2(a0)
|
||||
ld a4, _JB_S2(a0) // Don't clobber s2 (x18) used for SCS yet.
|
||||
ld s3, _JB_S3(a0)
|
||||
ld s4, _JB_S4(a0)
|
||||
ld s5, _JB_S5(a0)
|
||||
|
@ -245,6 +252,11 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(siglongjmp)
|
|||
m_unmangle_registers a2, sp_reg=a3
|
||||
mv sp, a3
|
||||
|
||||
// Restore the low bits of the shadow call stack pointer.
|
||||
li a5, ~SCS_MASK
|
||||
and x18, x18, a5
|
||||
or x18, a4, x18
|
||||
|
||||
addi sp, sp, -24
|
||||
sd ra, 0(sp)
|
||||
sd a0, 8(sp)
|
||||
|
|
|
@ -116,14 +116,14 @@ static void __init_alternate_signal_stack(pthread_internal_t* thread) {
|
|||
}
|
||||
|
||||
static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
|
||||
#ifdef __aarch64__
|
||||
#if defined(__aarch64__) || defined(__riscv)
|
||||
// Allocate the stack and the guard region.
|
||||
char* scs_guard_region = reinterpret_cast<char*>(
|
||||
mmap(nullptr, SCS_GUARD_REGION_SIZE, 0, MAP_PRIVATE | MAP_ANON, -1, 0));
|
||||
thread->shadow_call_stack_guard_region = scs_guard_region;
|
||||
|
||||
// The address is aligned to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
|
||||
// in jmp_buf.
|
||||
// in jmp_buf. See the SCS commentary in pthread_internal.h for more detail.
|
||||
char* scs_aligned_guard_region =
|
||||
reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
|
||||
|
||||
|
@ -133,11 +133,15 @@ static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
|
|||
size_t scs_offset =
|
||||
(getpid() == 1) ? 0 : (arc4random_uniform(SCS_GUARD_REGION_SIZE / SCS_SIZE - 1) * SCS_SIZE);
|
||||
|
||||
// Make the stack readable and writable and store its address in register x18. This is
|
||||
// deliberately the only place where the address is stored.
|
||||
char *scs = scs_aligned_guard_region + scs_offset;
|
||||
// Make the stack readable and writable and store its address in x18.
|
||||
// This is deliberately the only place where the address is stored.
|
||||
char* scs = scs_aligned_guard_region + scs_offset;
|
||||
mprotect(scs, SCS_SIZE, PROT_READ | PROT_WRITE);
|
||||
#if defined(__aarch64__)
|
||||
__asm__ __volatile__("mov x18, %0" ::"r"(scs));
|
||||
#elif defined(__riscv)
|
||||
__asm__ __volatile__("mv x18, %0" ::"r"(scs));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -117,7 +117,7 @@ void pthread_exit(void* return_value) {
|
|||
__rt_sigprocmask(SIG_BLOCK, &set, nullptr, sizeof(sigset64_t));
|
||||
}
|
||||
|
||||
#ifdef __aarch64__
|
||||
#if defined(__aarch64__) || defined(__riscv)
|
||||
// Free the shadow call stack and guard pages.
|
||||
munmap(thread->shadow_call_stack_guard_region, SCS_GUARD_REGION_SIZE);
|
||||
#endif
|
||||
|
|
|
@ -105,9 +105,13 @@ class pthread_internal_t {
|
|||
|
||||
void* alternate_signal_stack;
|
||||
|
||||
// The start address of the shadow call stack's guard region (arm64 only).
|
||||
// The start address of the shadow call stack's guard region (arm64/riscv64).
|
||||
// This region is SCS_GUARD_REGION_SIZE bytes large, but only SCS_SIZE bytes
|
||||
// are actually used.
|
||||
//
|
||||
// This address is only used to deallocate the shadow call stack on thread
|
||||
// exit; the address of the stack itself is stored only in the x18 register.
|
||||
//
|
||||
// Because the protection offered by SCS relies on the secrecy of the stack
|
||||
// address, storing the address here weakens the protection, but only
|
||||
// slightly, because it is relatively easy for an attacker to discover the
|
||||
|
@ -115,13 +119,22 @@ class pthread_internal_t {
|
|||
// to other allocations), but not the stack itself, which is <0.1% of the size
|
||||
// of the guard region.
|
||||
//
|
||||
// longjmp()/setjmp() don't store all the bits of x18, only the bottom bits
|
||||
// covered by SCS_MASK. Since longjmp()/setjmp() between different threads is
|
||||
// undefined behavior (and unsupported on Android), we can retrieve the high
|
||||
// bits of x18 from the current value in x18 --- all the jmp_buf needs to store
|
||||
// is where exactly the shadow stack pointer is in the thread's shadow stack:
|
||||
// the bottom bits of x18.
|
||||
//
|
||||
// There are at least two other options for discovering the start address of
|
||||
// the guard region on thread exit, but they are not as simple as storing in
|
||||
// TLS.
|
||||
//
|
||||
// 1) Derive it from the value of the x18 register. This is only possible in
|
||||
// processes that do not contain legacy code that might clobber x18,
|
||||
// therefore each process must declare early during process startup whether
|
||||
// it might load legacy code.
|
||||
// TODO: riscv64 has no legacy code, so we can actually go this route there!
|
||||
// 2) Mark the guard region as such using prctl(PR_SET_VMA_ANON_NAME) and
|
||||
// discover its address by reading /proc/self/maps. One issue with this is
|
||||
// that reading /proc/self/maps can race with allocations, so we may need
|
||||
|
|
|
@ -14,18 +14,18 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _BIONIC_CONSTANTS_H_
|
||||
#define _BIONIC_CONSTANTS_H_
|
||||
#pragma once
|
||||
|
||||
#define NS_PER_S 1000000000
|
||||
|
||||
// Size of the shadow call stack. This must be a power of 2.
|
||||
// Size of the shadow call stack. This can be small because these stacks only
|
||||
// contain return addresses. This must be a power of 2 so the mask trick works.
|
||||
// See the SCS commentary in pthread_internal.h for more detail.
|
||||
#define SCS_SIZE (8 * 1024)
|
||||
#define SCS_MASK (SCS_SIZE - 1)
|
||||
|
||||
// The shadow call stack is allocated at an aligned address within a guard region of this size. The
|
||||
// guard region must be large enough that we can allocate an SCS_SIZE-aligned SCS while ensuring
|
||||
// that there is at least one guard page after the SCS so that a stack overflow results in a SIGSEGV
|
||||
// instead of corrupting the allocation that comes after it.
|
||||
#define SCS_GUARD_REGION_SIZE (16 * 1024 * 1024)
|
||||
|
||||
#endif // _BIONIC_CONSTANTS_H_
|
||||
|
|
|
@ -66,18 +66,20 @@ cc_defaults {
|
|||
"libcutils_headers",
|
||||
"gwp_asan_headers"
|
||||
],
|
||||
// Ensure that the tests exercise shadow call stack support and
|
||||
// the hint space PAC/BTI instructions.
|
||||
stl: "libc++",
|
||||
|
||||
// Ensure that the tests exercise shadow call stack support.
|
||||
// We don't use `scs: true` here because that would give us a second
|
||||
// variant of this library where we actually just want to say "this
|
||||
// library should always be built this way".
|
||||
arch: {
|
||||
arm64: {
|
||||
cflags: [
|
||||
"-fsanitize=shadow-call-stack",
|
||||
// Disable this option for now: see b/151372823
|
||||
//"-mbranch-protection=standard",
|
||||
],
|
||||
cflags: ["-fsanitize=shadow-call-stack"],
|
||||
},
|
||||
riscv64: {
|
||||
cflags: ["-fsanitize=shadow-call-stack"],
|
||||
},
|
||||
},
|
||||
stl: "libc++",
|
||||
sanitize: {
|
||||
address: false,
|
||||
},
|
||||
|
|
|
@ -14,8 +14,6 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#if __has_feature(shadow_call_stack)
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "private/bionic_constants.h"
|
||||
|
@ -33,7 +31,9 @@ __attribute__((weak, noinline)) int recurse2(int count) {
|
|||
}
|
||||
|
||||
TEST(scs_test, stack_overflow) {
|
||||
#if defined(__aarch64__) || defined(__riscv)
|
||||
ASSERT_EXIT(recurse1(SCS_SIZE), testing::KilledBySignal(SIGSEGV), "");
|
||||
}
|
||||
|
||||
#else
|
||||
GTEST_SKIP() << "no SCS on this architecture";
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue