android_bionic/libc/arch-x86/bionic/clone.S

104 lines
2.5 KiB
ArmAsm
Raw Normal View History

#include <machine/asm.h>
#include <sys/linux-syscalls.h>
// int __pthread_clone(int (*fn)(void*), void* tls, int flags, void* arg);
ENTRY(__pthread_clone)
pushl %ebx
pushl %ecx
movl 16(%esp), %ecx
libc/x86: ensure the stack 16-byte aligned when tasks created Currently Renderscript sample code RsBalls crashed on x86 when SSE2 enabled. The root cause is that the stack was not 16-byte aligned from the beginning when the processes/threads were created, so the RsBalls crashed when SSE2 instructions tried to access the variables on the stack. - For the thread created by fork(): Its stack alignment is determined by crtbegin_{dynamic, static}.S - For the thread created by pthread_create(): Its stack alignment is determined by clone.S. __thread_entry( ) is a standard C function. In order to have its stack be aligned with 16 byte properly, __thread_entry() needs the stack with following layout when it is called: layout #1 (correct) -------------- | | -------------- <--ESP (ECX - 20) | ret EIP | -------------- <--ECX - 16 | arg0 | -------------- <--ECX - 12 | arg1 | -------------- <--ECX - 8 | arg2 | -------------- <--ECX - 4 | unused | -------------- <--ECX (16-byte boundary) But it has following layout for now: layout #2: (incorrect) -------------- | | -------------- <--ESP (ECX - 16) | unused | -------------- <--ECX - 12 | arg0 | -------------- <--ECX - 8 | arg1 | -------------- <--ECX - 4 | arg2 | -------------- <--ECX (16-byte boundary) Fixed in this patch. Change-Id: Ibe01f64db14be14033c505d854c73033556ddaa8 Signed-off-by: Michael Liao <michael.liao@intel.com> Signed-off-by: H.J. Lu <hongjiu.lu@intel.com> Signed-off-by: Jack Ren <jack.ren@intel.com> Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
2012-03-21 09:48:13 +00:00
# save tls
movl %ecx, %ebx
# 16-byte alignment on child stack
andl $~15, %ecx
# insert arguments onto the child stack
movl 12(%esp), %eax
libc/x86: ensure the stack 16-byte aligned when tasks created Currently Renderscript sample code RsBalls crashed on x86 when SSE2 enabled. The root cause is that the stack was not 16-byte aligned from the beginning when the processes/threads were created, so the RsBalls crashed when SSE2 instructions tried to access the variables on the stack. - For the thread created by fork(): Its stack alignment is determined by crtbegin_{dynamic, static}.S - For the thread created by pthread_create(): Its stack alignment is determined by clone.S. __thread_entry( ) is a standard C function. In order to have its stack be aligned with 16 byte properly, __thread_entry() needs the stack with following layout when it is called: layout #1 (correct) -------------- | | -------------- <--ESP (ECX - 20) | ret EIP | -------------- <--ECX - 16 | arg0 | -------------- <--ECX - 12 | arg1 | -------------- <--ECX - 8 | arg2 | -------------- <--ECX - 4 | unused | -------------- <--ECX (16-byte boundary) But it has following layout for now: layout #2: (incorrect) -------------- | | -------------- <--ESP (ECX - 16) | unused | -------------- <--ECX - 12 | arg0 | -------------- <--ECX - 8 | arg1 | -------------- <--ECX - 4 | arg2 | -------------- <--ECX (16-byte boundary) Fixed in this patch. Change-Id: Ibe01f64db14be14033c505d854c73033556ddaa8 Signed-off-by: Michael Liao <michael.liao@intel.com> Signed-off-by: H.J. Lu <hongjiu.lu@intel.com> Signed-off-by: Jack Ren <jack.ren@intel.com> Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
2012-03-21 09:48:13 +00:00
movl %eax, -16(%ecx)
movl 24(%esp), %eax
libc/x86: ensure the stack 16-byte aligned when tasks created Currently Renderscript sample code RsBalls crashed on x86 when SSE2 enabled. The root cause is that the stack was not 16-byte aligned from the beginning when the processes/threads were created, so the RsBalls crashed when SSE2 instructions tried to access the variables on the stack. - For the thread created by fork(): Its stack alignment is determined by crtbegin_{dynamic, static}.S - For the thread created by pthread_create(): Its stack alignment is determined by clone.S. __thread_entry( ) is a standard C function. In order to have its stack be aligned with 16 byte properly, __thread_entry() needs the stack with following layout when it is called: layout #1 (correct) -------------- | | -------------- <--ESP (ECX - 20) | ret EIP | -------------- <--ECX - 16 | arg0 | -------------- <--ECX - 12 | arg1 | -------------- <--ECX - 8 | arg2 | -------------- <--ECX - 4 | unused | -------------- <--ECX (16-byte boundary) But it has following layout for now: layout #2: (incorrect) -------------- | | -------------- <--ESP (ECX - 16) | unused | -------------- <--ECX - 12 | arg0 | -------------- <--ECX - 8 | arg1 | -------------- <--ECX - 4 | arg2 | -------------- <--ECX (16-byte boundary) Fixed in this patch. Change-Id: Ibe01f64db14be14033c505d854c73033556ddaa8 Signed-off-by: Michael Liao <michael.liao@intel.com> Signed-off-by: H.J. Lu <hongjiu.lu@intel.com> Signed-off-by: Jack Ren <jack.ren@intel.com> Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
2012-03-21 09:48:13 +00:00
movl %eax, -12(%ecx)
movl %ebx, -8(%ecx)
subl $16, %ecx
libc/x86: ensure the stack 16-byte aligned when tasks created Currently Renderscript sample code RsBalls crashed on x86 when SSE2 enabled. The root cause is that the stack was not 16-byte aligned from the beginning when the processes/threads were created, so the RsBalls crashed when SSE2 instructions tried to access the variables on the stack. - For the thread created by fork(): Its stack alignment is determined by crtbegin_{dynamic, static}.S - For the thread created by pthread_create(): Its stack alignment is determined by clone.S. __thread_entry( ) is a standard C function. In order to have its stack be aligned with 16 byte properly, __thread_entry() needs the stack with following layout when it is called: layout #1 (correct) -------------- | | -------------- <--ESP (ECX - 20) | ret EIP | -------------- <--ECX - 16 | arg0 | -------------- <--ECX - 12 | arg1 | -------------- <--ECX - 8 | arg2 | -------------- <--ECX - 4 | unused | -------------- <--ECX (16-byte boundary) But it has following layout for now: layout #2: (incorrect) -------------- | | -------------- <--ESP (ECX - 16) | unused | -------------- <--ECX - 12 | arg0 | -------------- <--ECX - 8 | arg1 | -------------- <--ECX - 4 | arg2 | -------------- <--ECX (16-byte boundary) Fixed in this patch. Change-Id: Ibe01f64db14be14033c505d854c73033556ddaa8 Signed-off-by: Michael Liao <michael.liao@intel.com> Signed-off-by: H.J. Lu <hongjiu.lu@intel.com> Signed-off-by: Jack Ren <jack.ren@intel.com> Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
2012-03-21 09:48:13 +00:00
movl 20(%esp), %ebx
movl $__NR_clone, %eax
int $0x80
test %eax, %eax
jns 1f
# an error occurred, set errno and return -1
negl %eax
call __set_errno
orl $-1, %eax
jmp 2f
1:
jnz 2f
# we're in the child thread now, call __thread_entry
# with the appropriate arguments on the child stack
# we already placed most of them
libc/x86: ensure the stack 16-byte aligned when tasks created Currently Renderscript sample code RsBalls crashed on x86 when SSE2 enabled. The root cause is that the stack was not 16-byte aligned from the beginning when the processes/threads were created, so the RsBalls crashed when SSE2 instructions tried to access the variables on the stack. - For the thread created by fork(): Its stack alignment is determined by crtbegin_{dynamic, static}.S - For the thread created by pthread_create(): Its stack alignment is determined by clone.S. __thread_entry( ) is a standard C function. In order to have its stack be aligned with 16 byte properly, __thread_entry() needs the stack with following layout when it is called: layout #1 (correct) -------------- | | -------------- <--ESP (ECX - 20) | ret EIP | -------------- <--ECX - 16 | arg0 | -------------- <--ECX - 12 | arg1 | -------------- <--ECX - 8 | arg2 | -------------- <--ECX - 4 | unused | -------------- <--ECX (16-byte boundary) But it has following layout for now: layout #2: (incorrect) -------------- | | -------------- <--ESP (ECX - 16) | unused | -------------- <--ECX - 12 | arg0 | -------------- <--ECX - 8 | arg1 | -------------- <--ECX - 4 | arg2 | -------------- <--ECX (16-byte boundary) Fixed in this patch. Change-Id: Ibe01f64db14be14033c505d854c73033556ddaa8 Signed-off-by: Michael Liao <michael.liao@intel.com> Signed-off-by: H.J. Lu <hongjiu.lu@intel.com> Signed-off-by: Jack Ren <jack.ren@intel.com> Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
2012-03-21 09:48:13 +00:00
call __thread_entry
hlt
2:
popl %ecx
popl %ebx
ret
END(__pthread_clone)
/*
* int __bionic_clone(unsigned long clone_flags,
* void* newsp,
* int *parent_tidptr,
* void *new_tls,
* int *child_tidptr,
* int (*fn)(void *),
* void *arg);
*/
ENTRY(__bionic_clone)
pushl %ebx
pushl %esi
pushl %edi
# insert arguments onto the child stack
movl 20(%esp), %ecx
andl $~15, %ecx
movl 36(%esp), %eax
movl %eax, -16(%ecx)
movl 40(%esp), %eax
movl %eax, -12(%ecx)
subl $16, %ecx
movl 16(%esp), %ebx
movl 24(%esp), %edx
movl 32(%esp), %esi
movl 28(%esp), %edi
movl $__NR_clone, %eax
int $0x80
test %eax, %eax
jns 1f
# an error occurred, set errno and return -1
negl %eax
call __set_errno
orl $-1, %eax
jmp 2f
1:
jnz 2f
# we're in the child now, call __bionic_clone_entry
# with the appropriate arguments on the child stack
# we already placed most of them
call __bionic_clone_entry
hlt
2:
popl %edi
popl %esi
popl %ebx
ret
END(__bionic_clone)