Merge "add 32-bit bionic implementation for denver arch"

This commit is contained in:
Christopher Ferris 2014-03-31 03:30:12 +00:00 committed by Gerrit Code Review
commit d8fe15fdb6
7 changed files with 962 additions and 1 deletions

View File

@ -70,7 +70,7 @@ ifeq ($(strip $(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)),)
endif endif
cpu_variant_mk := $(LOCAL_PATH)/arch-arm/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT).mk cpu_variant_mk := $(LOCAL_PATH)/arch-arm/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT).mk
ifeq ($(wildcard $(cpu_variant_mk)),) ifeq ($(wildcard $(cpu_variant_mk)),)
$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.") $(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
endif endif
include $(cpu_variant_mk) include $(cpu_variant_mk)
libc_common_additional_dependencies += $(cpu_variant_mk) libc_common_additional_dependencies += $(cpu_variant_mk)

View File

@ -0,0 +1,221 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <private/bionic_asm.h>
#include <private/libc_events.h>
.syntax unified
.thumb
.thumb_func
// Get the length of src string, then get the source of the dst string.
// Check that the two lengths together don't exceed the threshold, then
// do a memcpy of the data.
ENTRY(__strcat_chk)
pld [r0, #0]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
push {r4, r5}
.save {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
mov lr, r2
// Save the dst register to r5
mov r5, r0
// Zero out r4
eor r4, r4, r4
// r1 contains the address of the string to count.
.L_strlen_start:
mov r0, r1
ands r3, r1, #7
beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq .L_align_to_32
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
.L_align_to_32:
bcc .L_align_to_64
ands ip, r3, #2
beq .L_align_to_64
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
.L_align_to_64:
tst r3, #4
beq .L_mainloop
ldr r3, [r1], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
.p2align 2
.L_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_update_count_and_finish:
sub r3, r1, r0
sub r3, r3, #1
b .L_finish
.L_zero_in_first_register:
sub r3, r1, r0
lsls r2, ip, #17
bne .L_sub8_and_finish
bcs .L_sub7_and_finish
lsls ip, ip, #1
bne .L_sub6_and_finish
sub r3, r3, #5
b .L_finish
.L_sub8_and_finish:
sub r3, r3, #8
b .L_finish
.L_sub7_and_finish:
sub r3, r3, #7
b .L_finish
.L_sub6_and_finish:
sub r3, r3, #6
b .L_finish
.L_zero_in_second_register:
sub r3, r1, r0
lsls r2, ip, #17
bne .L_sub4_and_finish
bcs .L_sub3_and_finish
lsls ip, ip, #1
bne .L_sub2_and_finish
sub r3, r3, #1
b .L_finish
.L_sub4_and_finish:
sub r3, r3, #4
b .L_finish
.L_sub3_and_finish:
sub r3, r3, #3
b .L_finish
.L_sub2_and_finish:
sub r3, r3, #2
.L_finish:
cmp r4, #0
bne .L_strlen_done
// Time to get the dst string length.
mov r1, r5
// Save the original source address to r5.
mov r5, r0
// Save the current length (adding 1 for the terminator).
add r4, r3, #1
b .L_strlen_start
// r0 holds the pointer to the dst string.
// r3 holds the dst string length.
// r4 holds the src string length + 1.
.L_strlen_done:
add r2, r3, r4
cmp r2, lr
bhi __strcat_chk_failed
// Set up the registers for the memcpy code.
mov r1, r5
pld [r1, #64]
mov r2, r4
add r0, r0, r3
pop {r4, r5}
END(__strcat_chk)
#define MEMCPY_BASE __strcat_chk_memcpy_base
#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
#include "memcpy_base.S"
ENTRY_PRIVATE(__strcat_chk_failed)
.save {r0, lr}
.save {r4, r5}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
END(__strcat_chk_failed)
.data
error_string:
.string "strcat: prevented write past end of buffer"

View File

@ -0,0 +1,182 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <private/bionic_asm.h>
#include <private/libc_events.h>
.syntax unified
.thumb
.thumb_func
// Get the length of the source string first, then do a memcpy of the data
// instead of a strcpy.
ENTRY(__strcpy_chk)
pld [r0, #0]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
mov lr, r2
mov r0, r1
ands r3, r1, #7
beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq .L_align_to_32
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
.L_align_to_32:
bcc .L_align_to_64
ands ip, r3, #2
beq .L_align_to_64
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
.L_align_to_64:
tst r3, #4
beq .L_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
.p2align 2
.L_mainloop:
ldrd r2, r3, [r0], #8
pld [r0, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_update_count_and_finish:
sub r3, r0, r1
sub r3, r3, #1
b .L_check_size
.L_zero_in_first_register:
sub r3, r0, r1
lsls r2, ip, #17
bne .L_sub8_and_finish
bcs .L_sub7_and_finish
lsls ip, ip, #1
bne .L_sub6_and_finish
sub r3, r3, #5
b .L_check_size
.L_sub8_and_finish:
sub r3, r3, #8
b .L_check_size
.L_sub7_and_finish:
sub r3, r3, #7
b .L_check_size
.L_sub6_and_finish:
sub r3, r3, #6
b .L_check_size
.L_zero_in_second_register:
sub r3, r0, r1
lsls r2, ip, #17
bne .L_sub4_and_finish
bcs .L_sub3_and_finish
lsls ip, ip, #1
bne .L_sub2_and_finish
sub r3, r3, #1
b .L_check_size
.L_sub4_and_finish:
sub r3, r3, #4
b .L_check_size
.L_sub3_and_finish:
sub r3, r3, #3
b .L_check_size
.L_sub2_and_finish:
sub r3, r3, #2
.L_check_size:
pld [r1, #0]
pld [r1, #64]
ldr r0, [sp]
cmp r3, lr
bhs __strcpy_chk_failed
// Add 1 for copy length to get the string terminator.
add r2, r3, #1
END(__strcpy_chk)
#define MEMCPY_BASE __strcpy_chk_memcpy_base
#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
#include "memcpy_base.S"
ENTRY_PRIVATE(__strcpy_chk_failed)
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
END(__strcpy_chk_failed)
.data
error_string:
.string "strcpy: prevented write past end of buffer"

View File

@ -0,0 +1,105 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Prototype: void *memcpy (void *dst, const void *src, size_t count).
#include <private/bionic_asm.h>
#include <private/libc_events.h>
.text
.syntax unified
.fpu neon
ENTRY(__memcpy_chk)
cmp r2, r3
bhi __memcpy_chk_fail
// Fall through to memcpy...
END(__memcpy_chk)
ENTRY(memcpy)
pld [r1, #64]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
END(memcpy)
#define MEMCPY_BASE __memcpy_base
#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
#include "memcpy_base.S"
ENTRY_PRIVATE(__memcpy_chk_fail)
// Preserve lr for backtrace.
push {lr}
.save {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+8)
END(__memcpy_chk_fail)
.data
error_string:
.string "memcpy: prevented write past end of buffer"

View File

@ -0,0 +1,234 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
* Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#define CACHE_LINE_SIZE (64)
#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*6)
ENTRY_PRIVATE(MEMCPY_BASE)
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
cmp r2, #0
beq .L_memcpy_done
cmp r0, r1
beq .L_memcpy_done
/* preload next cache line */
pld [r1, #CACHE_LINE_SIZE*1]
/* Deal with very small blocks (< 32bytes) asap */
cmp r2, #32
blo .L_memcpy_lt_32bytes
/* no need to align if len < 128 bytes */
cmp r2, #128
blo .L_memcpy_lt_128bytes
/* large copy, align dest to 64 byte boundry */
pld [r1, #CACHE_LINE_SIZE*2]
rsb r3, r0, #0
ands r3, r3, #0x3F
pld [r1, #CACHE_LINE_SIZE*3]
beq .L_memcpy_dispatch
sub r2, r2, r3
/* copy 1 byte */
movs ip, r3, lsl #31
itt mi
ldrbmi ip, [r1], #1
strbmi ip, [r0], #1
/* copy 2 bytes */
itt cs
ldrhcs ip, [r1], #2
strhcs ip, [r0], #2
/* copy 4 bytes */
movs ip, r3, lsl #29
itt mi
ldrmi ip, [r1], #4
strmi ip, [r0], #4
/* copy 8 bytes */
bcc 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0, :64]!
1: /* copy 16 bytes */
movs ip, r3, lsl #27
bpl 1f
vld1.8 {q0}, [r1]!
vst1.8 {q0}, [r0, :128]!
1: /* copy 32 bytes */
bcc .L_memcpy_dispatch
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0, :256]!
.L_memcpy_dispatch:
// pre-decrement by 128 to detect nearly-done condition easily, but
// also need to check if we have less than 128 bytes left at this
// point due to alignment code above
subs r2, r2, #128
blo .L_memcpy_lt_128presub
// Denver does better if both source and dest are aligned so
// we'll special-case that even though the code is virually identical
tst r1, #0xF
bne .L_memcpy_neon_unalign_src_pld
// DRAM memcpy should be throttled slightly to get full bandwidth
//
cmp r2, #32768
bhi .L_memcpy_neon_unalign_src_pld
.align 4
1:
/* copy 128 bytes in each loop */
subs r2, r2, #128
/* preload a cache line */
pld [r1, #PREFETCH_DISTANCE]
/* copy a cache line */
vld1.8 {q0, q1}, [r1, :128]!
vst1.8 {q0, q1}, [r0, :256]!
vld1.8 {q0, q1}, [r1, :128]!
vst1.8 {q0, q1}, [r0, :256]!
/* preload a cache line */
pld [r1, #PREFETCH_DISTANCE]
/* copy a cache line */
vld1.8 {q0, q1}, [r1, :128]!
vst1.8 {q0, q1}, [r0, :256]!
vld1.8 {q0, q1}, [r1, :128]!
vst1.8 {q0, q1}, [r0, :256]!
bhs 1b
adds r2, r2, #128
bne .L_memcpy_lt_128bytes_align
pop {r0, pc}
.align 4
.L_memcpy_neon_unalign_src_pld:
1:
/* copy 128 bytes in each loop */
subs r2, r2, #128
/* preload a cache line */
pld [r1, #PREFETCH_DISTANCE]
/* copy a cache line */
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0, :256]!
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0, :256]!
/* preload a cache line */
pld [r1, #PREFETCH_DISTANCE]
/* copy a cache line */
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0, :256]!
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0, :256]!
bhs 1b
adds r2, r2, #128
bne .L_memcpy_lt_128bytes_align
pop {r0, pc}
.L_memcpy_lt_128presub:
add r2, r2, #128
.L_memcpy_lt_128bytes_align:
/* copy 64 bytes */
movs ip, r2, lsl #26
bcc 1f
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0, :256]!
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0, :256]!
1: /* copy 32 bytes */
bpl 1f
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0, :256]!
1: /* copy 16 bytes */
movs ip, r2, lsl #28
bcc 1f
vld1.8 {q0}, [r1]!
vst1.8 {q0}, [r0, :128]!
1: /* copy 8 bytes */
bpl 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0, :64]!
1: /* copy 4 bytes */
tst r2, #4
itt ne
ldrne ip, [r1], #4
strne ip, [r0], #4
/* copy 2 bytes */
movs ip, r2, lsl #31
itt cs
ldrhcs ip, [r1], #2
strhcs ip, [r0], #2
/* copy 1 byte */
itt mi
ldrbmi ip, [r1]
strbmi ip, [r0]
pop {r0, pc}
.L_memcpy_lt_128bytes:
/* copy 64 bytes */
movs ip, r2, lsl #26
bcc 1f
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0]!
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0]!
1: /* copy 32 bytes */
bpl .L_memcpy_lt_32bytes
vld1.8 {q0, q1}, [r1]!
vst1.8 {q0, q1}, [r0]!
.L_memcpy_lt_32bytes:
/* copy 16 bytes */
movs ip, r2, lsl #28
bcc 1f
vld1.8 {q0}, [r1]!
vst1.8 {q0}, [r0]!
1: /* copy 8 bytes */
bpl 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0]!
1: /* copy 4 bytes */
tst r2, #4
itt ne
ldrne ip, [r1], #4
strne ip, [r0], #4
/* copy 2 bytes */
movs ip, r2, lsl #31
itt cs
ldrhcs ip, [r1], #2
strhcs ip, [r0], #2
/* copy 1 byte */
itt mi
ldrbmi ip, [r1]
strbmi ip, [r0]
.L_memcpy_done:
pop {r0, pc}
END(MEMCPY_BASE)

View File

@ -0,0 +1,207 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/cpu-features.h>
#include <private/bionic_asm.h>
#include <private/libc_events.h>
/*
* Optimized memset() for ARM.
*
* memset() returns its first argument.
*/
.fpu neon
.syntax unified
ENTRY(__memset_chk)
cmp r2, r3
bls .L_done
// Preserve lr for backtrace.
push {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+8)
END(__memset_chk)
ENTRY(bzero)
mov r2, r1
mov r1, #0
.L_done:
// Fall through to memset...
END(bzero)
ENTRY(memset)
pldw [r0]
mov r3, r0
// Duplicate the low byte of r1
mov r1, r1, lsl #24
orr r1, r1, r1, lsr #8
orr r1, r1, r1, lsr #16
cmp r2, #16
blo .L_less_than_16
// This section handles regions 16 bytes or larger
//
// Use aligned vst1.8 and vstm when possible. Register values will be:
// ip is scratch
// q0, q1, and r1 contain the memset value
// r2 is the number of bytes to set
// r3 is the advancing destination pointer
vdup.32 q0, r1
ands ip, r3, 0xF
beq .L_memset_aligned
// Align dest pointer to 16-byte boundary.
pldw [r0, #64]
rsb ip, ip, #16
// Pre-adjust the byte count to reflect post-aligment value. Expecting
// 8-byte alignment to be rather common so we special case that one.
sub r2, r2, ip
/* set 1 byte */
tst ip, #1
it ne
strbne r1, [r3], #1
/* set 2 bytes */
tst ip, #2
it ne
strhne r1, [r3], #2
/* set 4 bytes */
movs ip, ip, lsl #29
it mi
strmi r1, [r3], #4
/* set 8 bytes */
itt cs
strcs r1, [r3], #4
strcs r1, [r3], #4
.L_memset_aligned:
// Destination is now 16-byte aligned. Determine how to handle
// remaining bytes.
vmov q1, q0
cmp r2, #128
blo .L_less_than_128
// We need to set a larger block of memory. Use four Q regs to
// set a full cache line in one instruction. Pre-decrement
// r2 to simplify end-of-loop detection
vmov q2, q0
vmov q3, q0
pldw [r0, #128]
sub r2, r2, #128
.align 4
.L_memset_loop_128:
pldw [r3, #192]
vstm r3!, {q0, q1, q2, q3}
vstm r3!, {q0, q1, q2, q3}
subs r2, r2, #128
bhs .L_memset_loop_128
// Un-bias r2 so it contains the number of bytes left. Early
// exit if we are done.
adds r2, r2, #128
beq 2f
.align 4
.L_less_than_128:
// set 64 bytes
movs ip, r2, lsl #26
bcc 1f
vst1.8 {q0, q1}, [r3, :128]!
vst1.8 {q0, q1}, [r3, :128]!
beq 2f
1:
// set 32 bytes
bpl 1f
vst1.8 {q0, q1}, [r3, :128]!
1:
// set 16 bytes
movs ip, r2, lsl #28
bcc 1f
vst1.8 {q0}, [r3, :128]!
beq 2f
1:
// set 8 bytes
bpl 1f
vst1.8 {d0}, [r3, :64]!
1:
// set 4 bytes
tst r2, #4
it ne
strne r1, [r3], #4
1:
// set 2 bytes
movs ip, r2, lsl #31
it cs
strhcs r1, [r3], #2
// set 1 byte
it mi
strbmi r1, [r3]
2:
bx lr
.L_less_than_16:
// Store up to 15 bytes without worrying about byte alignment
movs ip, r2, lsl #29
bcc 1f
str r1, [r3], #4
str r1, [r3], #4
beq 2f
1:
it mi
strmi r1, [r3], #4
movs ip, r2, lsl #31
it mi
strbmi r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3]
2:
bx lr
END(memset)
.data
error_string:
.string "memset: prevented write past end of buffer"

View File

@ -0,0 +1,12 @@
libc_bionic_src_files_arm += \
arch-arm/denver/bionic/memcpy.S \
arch-arm/denver/bionic/memset.S \
arch-arm/denver/bionic/__strcat_chk.S \
arch-arm/denver/bionic/__strcpy_chk.S
# Use cortex-a15 versions of strcat/strcpy/strlen.
libc_bionic_src_files_arm += \
arch-arm/cortex-a15/bionic/strcat.S \
arch-arm/cortex-a15/bionic/strcpy.S \
arch-arm/cortex-a15/bionic/strlen.S \
arch-arm/cortex-a15/bionic/strcmp.S