Add new optimized strlen for arm.
This optimized version is primarily targeted at cortex-a15.
Tested on all nexus devices using the system/extras/libc_test strlen test.
Tested alignments from 1 to 32 that are powers of 2.
Tested that strlen does not cross page boundaries at all alignments.
Speed improvements listed below:
cortex-a15
- Sizes >= 32 bytes, ~75% improvement.
- Sizes >= 1024 bytes, ~250% improvement.
cortex-a9
- Sizes >= 32 bytes, ~75% improvement.
- Sizes >= 1024 bytes, ~85% improvement.
krait
- Sizes >= 32 bytes, ~95% improvement.
- Sizes >= 1024 bytes, ~160% improvement.
Merge from internal master.
(cherry-picked from 2fc0717977
)
Change-Id: I1ceceb4e745fd68e9d946f96d1d42e0cdaff6ccf
This commit is contained in:
parent
f35e0c149f
commit
0aa9b52efa
|
@ -15,7 +15,6 @@ _LIBC_ARCH_COMMON_SRC_FILES := \
|
|||
arch-arm/bionic/setjmp.S \
|
||||
arch-arm/bionic/sigsetjmp.S \
|
||||
arch-arm/bionic/strcpy.S \
|
||||
arch-arm/bionic/strlen.c.arm \
|
||||
arch-arm/bionic/syscall.S \
|
||||
arch-arm/bionic/tgkill.S \
|
||||
arch-arm/bionic/tkill.S \
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <machine/asm.h>
|
||||
|
||||
.syntax unified
|
||||
|
||||
.thumb
|
||||
.thumb_func
|
||||
|
||||
ENTRY(strlen)
|
||||
pld [r1, #128]
|
||||
mov r1, r0
|
||||
|
||||
rsb r3, r0, #0
|
||||
ands r3, r3, #7
|
||||
beq mainloop
|
||||
|
||||
// Align to a double word (64 bits).
|
||||
ands ip, r3, #1
|
||||
beq align_to_32
|
||||
|
||||
ldrb r2, [r1], #1
|
||||
cmp r2, #0
|
||||
beq update_count_and_return
|
||||
|
||||
align_to_32:
|
||||
ands ip, r3, #2
|
||||
beq align_to_64
|
||||
|
||||
ldrb r2, [r1], #1
|
||||
cmp r2, #0
|
||||
beq update_count_and_return
|
||||
ldrb r2, [r1], #1
|
||||
cmp r2, #0
|
||||
beq update_count_and_return
|
||||
|
||||
align_to_64:
|
||||
ands ip, r3, #4
|
||||
beq mainloop
|
||||
ldr r3, [r1], #4
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne zero_in_second_register
|
||||
|
||||
mainloop:
|
||||
ldrd r2, r3, [r1], #8
|
||||
|
||||
pld [r1, #64]
|
||||
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne zero_in_first_register
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne zero_in_second_register
|
||||
b mainloop
|
||||
|
||||
zero_in_first_register:
|
||||
sub r1, r1, #4
|
||||
|
||||
zero_in_second_register:
|
||||
sub r0, r1, r0
|
||||
|
||||
// Check for zero in byte 0.
|
||||
ands r1, ip, #0x80
|
||||
beq check_byte1
|
||||
|
||||
sub r0, r0, #4
|
||||
bx lr
|
||||
|
||||
check_byte1:
|
||||
// Check for zero in byte 1.
|
||||
ands r1, ip, #0x8000
|
||||
beq check_byte2
|
||||
|
||||
sub r0, r0, #3
|
||||
bx lr
|
||||
|
||||
check_byte2:
|
||||
// Check for zero in byte 2.
|
||||
ands r1, ip, #0x800000
|
||||
beq return
|
||||
|
||||
sub r0, r0, #2
|
||||
bx lr
|
||||
|
||||
update_count_and_return:
|
||||
sub r0, r1, r0
|
||||
|
||||
return:
|
||||
sub r0, r0, #1
|
||||
bx lr
|
||||
END(strlen)
|
|
@ -1,5 +1,6 @@
|
|||
$(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a15/bionic/memcpy.S)
|
||||
$(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a15/bionic/memset.S)
|
||||
$(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a15/bionic/strcmp.S)
|
||||
$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
|
||||
|
||||
include bionic/libc/arch-arm/generic/generic.mk
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
$(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a9/bionic/memcpy.S)
|
||||
$(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a9/bionic/memset.S)
|
||||
$(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a9/bionic/strcmp.S)
|
||||
# Use cortex-a15 version of strlen.
|
||||
$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
|
||||
|
||||
include bionic/libc/arch-arm/generic/generic.mk
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
$(call libc-add-cpu-variant-src,MEMCPY,arch-arm/generic/bionic/memcpy.S)
|
||||
$(call libc-add-cpu-variant-src,MEMSET,arch-arm/generic/bionic/memset.S)
|
||||
$(call libc-add-cpu-variant-src,STRCMP,arch-arm/generic/bionic/strcmp.S)
|
||||
$(call libc-add-cpu-variant-src,STRLEN,arch-arm/generic/bionic/strlen.c)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
$(call libc-add-cpu-variant-src,MEMCPY,arch-arm/krait/bionic/memcpy.S)
|
||||
$(call libc-add-cpu-variant-src,MEMSET,arch-arm/krait/bionic/memset.S)
|
||||
$(call libc-add-cpu-variant-src,STRCMP,arch-arm/krait/bionic/strcmp.S)
|
||||
# Use cortex-a15 version of strlen.
|
||||
$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
|
||||
|
||||
include bionic/libc/arch-arm/generic/generic.mk
|
||||
|
|
Loading…
Reference in New Issue