Remove pushes from memsets (krait/cortex-a9).
On the path that only uses r0 in both the krait and cortex-a9 memset, remove the push and use r3 instead. In addition, for cortex-a9, remove the artificial function since it's not needed since dwarf unwinding is now supported on arm. Change-Id: Ia4ed1cc435b03627a7193215e76c8ea3335f949a
This commit is contained in:
parent
adc5795fb7
commit
8264cbba7e
|
@ -69,12 +69,9 @@ END(bzero)
|
|||
ENTRY(memset)
|
||||
// The neon memset only wins for less than 132.
|
||||
cmp r2, #132
|
||||
bhi __memset_large_copy
|
||||
|
||||
stmfd sp!, {r0}
|
||||
.cfi_def_cfa_offset 4
|
||||
.cfi_rel_offset r0, 0
|
||||
bhi .L_memset_large_copy
|
||||
|
||||
mov r3, r0
|
||||
vdup.8 q0, r1
|
||||
|
||||
/* make sure we have at least 32 bytes to write */
|
||||
|
@ -84,7 +81,7 @@ ENTRY(memset)
|
|||
|
||||
1: /* The main loop writes 32 bytes at a time */
|
||||
subs r2, r2, #32
|
||||
vst1.8 {d0 - d3}, [r0]!
|
||||
vst1.8 {d0 - d3}, [r3]!
|
||||
bhs 1b
|
||||
|
||||
2: /* less than 32 left */
|
||||
|
@ -93,22 +90,20 @@ ENTRY(memset)
|
|||
beq 3f
|
||||
|
||||
// writes 16 bytes, 128-bits aligned
|
||||
vst1.8 {d0, d1}, [r0]!
|
||||
vst1.8 {d0, d1}, [r3]!
|
||||
3: /* write up to 15-bytes (count in r2) */
|
||||
movs ip, r2, lsl #29
|
||||
bcc 1f
|
||||
vst1.8 {d0}, [r0]!
|
||||
vst1.8 {d0}, [r3]!
|
||||
1: bge 2f
|
||||
vst1.32 {d0[0]}, [r0]!
|
||||
vst1.32 {d0[0]}, [r3]!
|
||||
2: movs ip, r2, lsl #31
|
||||
strbmi r1, [r0], #1
|
||||
strbcs r1, [r0], #1
|
||||
strbcs r1, [r0], #1
|
||||
ldmfd sp!, {r0}
|
||||
strbmi r1, [r3], #1
|
||||
strbcs r1, [r3], #1
|
||||
strbcs r1, [r3], #1
|
||||
bx lr
|
||||
END(memset)
|
||||
|
||||
ENTRY_PRIVATE(__memset_large_copy)
|
||||
.L_memset_large_copy:
|
||||
/* compute the offset to align the destination
|
||||
* offset = (4-(src&3))&3 = -src & 3
|
||||
*/
|
||||
|
@ -180,7 +175,7 @@ ENTRY_PRIVATE(__memset_large_copy)
|
|||
movs r2, r2, lsl #2
|
||||
strbcs r1, [r0]
|
||||
ldmfd sp!, {r0, r4-r7, pc}
|
||||
END(__memset_large_copy)
|
||||
END(memset)
|
||||
|
||||
.data
|
||||
error_string:
|
||||
|
|
|
@ -69,10 +69,7 @@ END(bzero)
|
|||
|
||||
/* memset() returns its first argument. */
|
||||
ENTRY(memset)
|
||||
stmfd sp!, {r0}
|
||||
.cfi_def_cfa_offset 4
|
||||
.cfi_rel_offset r0, 0
|
||||
|
||||
mov r3, r0
|
||||
vdup.8 q0, r1
|
||||
|
||||
/* make sure we have at least 32 bytes to write */
|
||||
|
@ -82,7 +79,7 @@ ENTRY(memset)
|
|||
|
||||
1: /* The main loop writes 32 bytes at a time */
|
||||
subs r2, r2, #32
|
||||
vst1.8 {d0 - d3}, [r0]!
|
||||
vst1.8 {d0 - d3}, [r3]!
|
||||
bhs 1b
|
||||
|
||||
2: /* less than 32 left */
|
||||
|
@ -91,18 +88,17 @@ ENTRY(memset)
|
|||
beq 3f
|
||||
|
||||
// writes 16 bytes, 128-bits aligned
|
||||
vst1.8 {d0, d1}, [r0]!
|
||||
vst1.8 {d0, d1}, [r3]!
|
||||
3: /* write up to 15-bytes (count in r2) */
|
||||
movs ip, r2, lsl #29
|
||||
bcc 1f
|
||||
vst1.8 {d0}, [r0]!
|
||||
vst1.8 {d0}, [r3]!
|
||||
1: bge 2f
|
||||
vst1.32 {d0[0]}, [r0]!
|
||||
vst1.32 {d0[0]}, [r3]!
|
||||
2: movs ip, r2, lsl #31
|
||||
strbmi r1, [r0], #1
|
||||
strbcs r1, [r0], #1
|
||||
strbcs r1, [r0], #1
|
||||
ldmfd sp!, {r0}
|
||||
strbmi r1, [r3], #1
|
||||
strbcs r1, [r3], #1
|
||||
strbcs r1, [r3], #1
|
||||
bx lr
|
||||
END(memset)
|
||||
|
||||
|
|
Loading…
Reference in New Issue