Switch to the arm-optimized-routines string routines on aarch64 where possible.
This includes optimized strrchr and strchrnul routines, and an MTE-compatible strlen routine. Bug: 135772972 Change-Id: I48499f757cdc6d3e77e5649123d45b17dfa3c6b0
This commit is contained in:
parent
fcbdba22ab
commit
337a5b3f9a
|
@ -777,6 +777,7 @@ cc_library_static {
|
|||
"bionic/android_set_abort_message.cpp",
|
||||
|
||||
"bionic/strchr.cpp",
|
||||
"bionic/strchrnul.cpp",
|
||||
"bionic/strnlen.c",
|
||||
"bionic/strrchr.cpp",
|
||||
],
|
||||
|
@ -837,25 +838,17 @@ cc_library_static {
|
|||
},
|
||||
arm64: {
|
||||
srcs: [
|
||||
"arch-arm64/generic/bionic/memcmp.S",
|
||||
"arch-arm64/generic/bionic/memcpy.S",
|
||||
"arch-arm64/generic/bionic/memmove.S",
|
||||
"arch-arm64/generic/bionic/memset.S",
|
||||
"arch-arm64/generic/bionic/stpcpy.S",
|
||||
"arch-arm64/generic/bionic/strcpy.S",
|
||||
"arch-arm64/generic/bionic/wmemmove.S",
|
||||
|
||||
"arch-arm64/default/bionic/memchr.S",
|
||||
"arch-arm64/default/bionic/strchr.S",
|
||||
"arch-arm64/default/bionic/strcmp.S",
|
||||
"arch-arm64/default/bionic/strlen.S",
|
||||
"arch-arm64/default/bionic/strncmp.S",
|
||||
"arch-arm64/default/bionic/strnlen.S",
|
||||
|
||||
"arch-arm64/mte/bionic/memchr.c",
|
||||
"arch-arm64/mte/bionic/strchr.cpp",
|
||||
"arch-arm64/mte/bionic/stpcpy.S",
|
||||
"arch-arm64/mte/bionic/strchrnul.cpp",
|
||||
"arch-arm64/mte/bionic/strrchr.cpp",
|
||||
"arch-arm64/mte/bionic/strcmp.c",
|
||||
"arch-arm64/mte/bionic/strlen.c",
|
||||
"arch-arm64/mte/bionic/strcpy.S",
|
||||
"arch-arm64/mte/bionic/strncmp.c",
|
||||
"arch-arm64/mte/bionic/strnlen.c",
|
||||
|
||||
|
@ -868,7 +861,9 @@ cc_library_static {
|
|||
exclude_srcs: [
|
||||
"bionic/__memcpy_chk.cpp",
|
||||
"bionic/strchr.cpp",
|
||||
"bionic/strchrnul.cpp",
|
||||
"bionic/strnlen.c",
|
||||
"bionic/strrchr.cpp",
|
||||
],
|
||||
},
|
||||
|
||||
|
@ -1121,7 +1116,6 @@ cc_library_static {
|
|||
"bionic/spawn.cpp",
|
||||
"bionic/stat.cpp",
|
||||
"bionic/stdlib_l.cpp",
|
||||
"bionic/strchrnul.cpp",
|
||||
"bionic/strerror.cpp",
|
||||
"bionic/string_l.cpp",
|
||||
"bionic/strings_l.cpp",
|
||||
|
@ -1358,6 +1352,7 @@ cc_library_static {
|
|||
|
||||
whole_static_libs: [
|
||||
"gwp_asan",
|
||||
"libarm-optimized-routines-string",
|
||||
"libc_bionic_ndk",
|
||||
"libc_bootstrap",
|
||||
"libc_fortify",
|
||||
|
@ -1388,6 +1383,7 @@ cc_library_static {
|
|||
name: "libc_nopthread",
|
||||
|
||||
whole_static_libs: [
|
||||
"libarm-optimized-routines-string",
|
||||
"libc_bionic",
|
||||
"libc_bionic_ndk",
|
||||
"libc_bootstrap",
|
||||
|
|
|
@ -1,164 +0,0 @@
|
|||
/*
|
||||
*
|
||||
Copyright (c) 2014, ARM Limited
|
||||
All rights Reserved.
|
||||
Copyright (c) 2014, Linaro Ltd.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the company nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
#define cntin x2
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x3
|
||||
#define tmp x4
|
||||
#define wtmp2 w5
|
||||
#define synd x6
|
||||
#define soff x9
|
||||
#define cntrem x10
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_chr1 v3
|
||||
#define vhas_chr2 v4
|
||||
#define vrepmask v5
|
||||
#define vend v6
|
||||
|
||||
/*
|
||||
* Core algorithm:
|
||||
*
|
||||
* For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
|
||||
* per byte. For each tuple, bit 0 is set if the relevant byte matched the
|
||||
* requested character and bit 1 is not used (faster than using a 32bit
|
||||
* syndrome). Since the bits in the syndrome reflect exactly the order in which
|
||||
* things occur in the original string, counting trailing zeros allows to
|
||||
* identify exactly which byte has matched.
|
||||
*/
|
||||
|
||||
ENTRY(memchr_default)
|
||||
/*
|
||||
* Magic constant 0x40100401 allows us to identify which lane matches
|
||||
* the requested byte.
|
||||
*/
|
||||
cbz cntin, .Lzero_length
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
/* Work with aligned 32-byte chunks */
|
||||
bic src, srcin, #31
|
||||
dup vrepmask.4s, wtmp2
|
||||
ands soff, srcin, #31
|
||||
and cntrem, cntin, #31
|
||||
b.eq .Lloop
|
||||
|
||||
/*
|
||||
* Input string is not 32-byte aligned. We calculate the syndrome
|
||||
* value for the aligned 32 bytes block containing the first bytes
|
||||
* and mask the irrelevant part.
|
||||
*/
|
||||
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
sub tmp, soff, #32
|
||||
adds cntin, cntin, tmp
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
|
||||
addp vend.16b, vend.16b, vend.16b /* 128->64 */
|
||||
mov synd, vend.d[0]
|
||||
/* Clear the soff*2 lower bits */
|
||||
lsl tmp, soff, #1
|
||||
lsr synd, synd, tmp
|
||||
lsl synd, synd, tmp
|
||||
/* The first block can also be the last */
|
||||
b.ls .Lmasklast
|
||||
/* Have we found something already? */
|
||||
cbnz synd, .Ltail
|
||||
|
||||
.Lloop:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
subs cntin, cntin, #32
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
/* If we're out of data we finish regardless of the result */
|
||||
b.ls .Lend
|
||||
/* Use a fast check for the termination condition */
|
||||
orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
|
||||
addp vend.2d, vend.2d, vend.2d
|
||||
mov synd, vend.d[0]
|
||||
/* We're not out of data, loop if we haven't found the character */
|
||||
cbz synd, .Lloop
|
||||
|
||||
.Lend:
|
||||
/* Termination condition found, let's calculate the syndrome value */
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
|
||||
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
|
||||
addp vend.16b, vend.16b, vend.16b /* 128->64 */
|
||||
mov synd, vend.d[0]
|
||||
/* Only do the clear for the last possible block */
|
||||
b.hi .Ltail
|
||||
|
||||
.Lmasklast:
|
||||
/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
|
||||
add tmp, cntrem, soff
|
||||
and tmp, tmp, #31
|
||||
sub tmp, tmp, #32
|
||||
neg tmp, tmp, lsl #1
|
||||
lsl synd, synd, tmp
|
||||
lsr synd, synd, tmp
|
||||
|
||||
.Ltail:
|
||||
/* Count the trailing zeros using bit reversing */
|
||||
rbit synd, synd
|
||||
/* Compensate the last post-increment */
|
||||
sub src, src, #32
|
||||
/* Check that we have found a character */
|
||||
cmp synd, #0
|
||||
/* And count the leading zeros */
|
||||
clz synd, synd
|
||||
/* Compute the potential result */
|
||||
add result, src, synd, lsr #1
|
||||
/* Select result or NULL */
|
||||
csel result, xzr, result, eq
|
||||
ret
|
||||
|
||||
.Lzero_length:
|
||||
mov result, xzr
|
||||
ret
|
||||
END(memchr_default)
|
|
@ -1,153 +0,0 @@
|
|||
/*
|
||||
*
|
||||
Copyright (c) 2014, ARM Limited
|
||||
All rights Reserved.
|
||||
Copyright (c) 2014, Linaro Ltd.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the company nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
* Neon Available.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define chrin w1
|
||||
|
||||
#define result x0
|
||||
|
||||
#define src x2
|
||||
#define tmp1 x3
|
||||
#define wtmp2 w4
|
||||
#define tmp3 x5
|
||||
|
||||
#define vrepchr v0
|
||||
#define vdata1 v1
|
||||
#define vdata2 v2
|
||||
#define vhas_nul1 v3
|
||||
#define vhas_nul2 v4
|
||||
#define vhas_chr1 v5
|
||||
#define vhas_chr2 v6
|
||||
#define vrepmask_0 v7
|
||||
#define vrepmask_c v16
|
||||
#define vend1 v17
|
||||
#define vend2 v18
|
||||
|
||||
/* Core algorithm.
|
||||
|
||||
For each 32-byte hunk we calculate a 64-bit syndrome value, with
|
||||
two bits per byte (LSB is always in bits 0 and 1, for both big
|
||||
and little-endian systems). For each tuple, bit 0 is set iff
|
||||
the relevant byte matched the requested character; bit 1 is set
|
||||
iff the relevant byte matched the NUL end of string (we trigger
|
||||
off bit0 for the special case of looking for NUL). Since the bits
|
||||
in the syndrome reflect exactly the order in which things occur
|
||||
in the original string a count_trailing_zeros() operation will
|
||||
identify exactly which byte is causing the termination, and why. */
|
||||
|
||||
/* Locals and temporaries. */
|
||||
|
||||
ENTRY(strchr_default)
|
||||
/* Magic constant 0x40100401 to allow us to identify which lane
|
||||
matches the requested byte. Magic constant 0x80200802 used
|
||||
similarly for NUL termination. */
|
||||
mov wtmp2, #0x0401
|
||||
movk wtmp2, #0x4010, lsl #16
|
||||
dup vrepchr.16b, chrin
|
||||
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
|
||||
dup vrepmask_c.4s, wtmp2
|
||||
ands tmp1, srcin, #31
|
||||
add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
|
||||
b.eq .Lloop
|
||||
|
||||
/* Input string is not 32-byte aligned. Rather than forcing
|
||||
the padding bytes to a safe value, we calculate the syndrome
|
||||
for all the bytes, but then mask off those bits of the
|
||||
syndrome that are related to the padding. */
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
neg tmp1, tmp1
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
lsl tmp1, tmp1, #1
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 256->128
|
||||
mov tmp3, #~0
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 128->64
|
||||
lsr tmp1, tmp3, tmp1
|
||||
|
||||
mov tmp3, vend1.d[0]
|
||||
bic tmp1, tmp3, tmp1 // Mask padding bits.
|
||||
cbnz tmp1, .Ltail
|
||||
|
||||
.Lloop:
|
||||
ld1 {vdata1.16b, vdata2.16b}, [src], #32
|
||||
cmeq vhas_nul1.16b, vdata1.16b, #0
|
||||
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
|
||||
cmeq vhas_nul2.16b, vdata2.16b, #0
|
||||
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
|
||||
/* Use a fast check for the termination condition. */
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
orr vend1.16b, vend1.16b, vend2.16b
|
||||
addp vend1.2d, vend1.2d, vend1.2d
|
||||
mov tmp1, vend1.d[0]
|
||||
cbz tmp1, .Lloop
|
||||
|
||||
/* Termination condition found. Now need to establish exactly why
|
||||
we terminated. */
|
||||
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
|
||||
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
|
||||
and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
|
||||
and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
|
||||
orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
|
||||
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 256->128
|
||||
addp vend1.16b, vend1.16b, vend2.16b // 128->64
|
||||
|
||||
mov tmp1, vend1.d[0]
|
||||
.Ltail:
|
||||
/* Count the trailing zeros, by bit reversing... */
|
||||
rbit tmp1, tmp1
|
||||
/* Re-bias source. */
|
||||
sub src, src, #32
|
||||
clz tmp1, tmp1 /* And counting the leading zeros. */
|
||||
/* Tmp1 is even if the target charager was found first. Otherwise
|
||||
we've found the end of string and we weren't looking for NUL. */
|
||||
tst tmp1, #1
|
||||
add result, src, tmp1, lsr #1
|
||||
csel result, result, xzr, eq
|
||||
ret
|
||||
END(strchr_default)
|
|
@ -1,192 +0,0 @@
|
|||
/* Copyright (c) 2012, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#define L(label) .L ## label
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define result x0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x2
|
||||
#define data1w w2
|
||||
#define data2 x3
|
||||
#define data2w w3
|
||||
#define has_nul x4
|
||||
#define diff x5
|
||||
#define syndrome x6
|
||||
#define tmp1 x7
|
||||
#define tmp2 x8
|
||||
#define tmp3 x9
|
||||
#define zeroones x10
|
||||
#define pos x11
|
||||
|
||||
/* Start of performance-critical section -- one 64B cache line. */
|
||||
ENTRY(strcmp_default)
|
||||
.p2align 6
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
b.ne L(misaligned8)
|
||||
ands tmp1, src1, #7
|
||||
b.ne L(mutual_align)
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
L(loop_aligned):
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
L(start_realigned):
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbz syndrome, L(loop_aligned)
|
||||
/* End of performance-critical section -- one 64B cache line. */
|
||||
|
||||
L(end):
|
||||
#ifndef __AARCH64EB__
|
||||
rev syndrome, syndrome
|
||||
rev data1, data1
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
clz pos, syndrome
|
||||
rev data2, data2
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#else
|
||||
/* For big-endian we cannot use the trick with the syndrome value
|
||||
as carry-propagation can corrupt the upper bits if the trailing
|
||||
bytes in the string contain 0x01. */
|
||||
/* However, if there is no NUL byte in the dword, we can generate
|
||||
the result directly. We can't just subtract the bytes as the
|
||||
MSB might be significant. */
|
||||
cbnz has_nul, 1f
|
||||
cmp data1, data2
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
1:
|
||||
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||
rev tmp3, data1
|
||||
sub tmp1, tmp3, zeroones
|
||||
orr tmp2, tmp3, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
rev has_nul, has_nul
|
||||
orr syndrome, diff, has_nul
|
||||
clz pos, syndrome
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#endif
|
||||
|
||||
L(mutual_align):
|
||||
/* Sources are mutually aligned, but are not currently at an
|
||||
alignment boundary. Round down the addresses and then mask off
|
||||
the bytes that preceed the start point. */
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||
ldr data1, [src1], #8
|
||||
neg tmp1, tmp1 /* Bits to alignment -64. */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
b L(start_realigned)
|
||||
|
||||
L(misaligned8):
|
||||
/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
|
||||
checking to make sure that we don't access beyond page boundary in
|
||||
SRC2. */
|
||||
tst src1, #7
|
||||
b.eq L(loop_misaligned)
|
||||
L(do_misaligned):
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
cmp data1w, #1
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.ne L(done)
|
||||
tst src1, #7
|
||||
b.ne L(do_misaligned)
|
||||
|
||||
L(loop_misaligned):
|
||||
/* Test if we are within the last dword of the end of a 4K page. If
|
||||
yes then jump back to the misaligned loop to copy a byte at a time. */
|
||||
and tmp1, src2, #0xff8
|
||||
eor tmp1, tmp1, #0xff8
|
||||
cbz tmp1, L(do_misaligned)
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbz syndrome, L(loop_misaligned)
|
||||
b L(end)
|
||||
|
||||
L(done):
|
||||
sub result, data1, data2
|
||||
ret
|
||||
END(strcmp_default)
|
|
@ -1,227 +0,0 @@
|
|||
/* Copyright (c) 2013-2015, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
/* To test the page crossing code path more thoroughly, compile with
|
||||
-DTEST_PAGE_CROSS - this will force all calls through the slower
|
||||
entry path. This option is not intended for production use. */
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define len x0
|
||||
|
||||
/* Locals and temporaries. */
|
||||
#define src x1
|
||||
#define data1 x2
|
||||
#define data2 x3
|
||||
#define has_nul1 x4
|
||||
#define has_nul2 x5
|
||||
#define tmp1 x4
|
||||
#define tmp2 x5
|
||||
#define tmp3 x6
|
||||
#define tmp4 x7
|
||||
#define zeroones x8
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. A faster check
|
||||
(X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
|
||||
false hits for characters 129..255. */
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
#ifdef TEST_PAGE_CROSS
|
||||
# define MIN_PAGE_SIZE 15
|
||||
#else
|
||||
# define MIN_PAGE_SIZE 4096
|
||||
#endif
|
||||
|
||||
/* Since strings are short on average, we check the first 16 bytes
|
||||
of the string for a NUL character. In order to do an unaligned ldp
|
||||
safely we have to do a page cross check first. If there is a NUL
|
||||
byte we calculate the length from the 2 8-byte words using
|
||||
conditional select to reduce branch mispredictions (it is unlikely
|
||||
strlen will be repeatedly called on strings with the same length).
|
||||
|
||||
If the string is longer than 16 bytes, we align src so don't need
|
||||
further page cross checks, and process 32 bytes per iteration
|
||||
using the fast NUL check. If we encounter non-ASCII characters,
|
||||
fallback to a second loop using the full NUL check.
|
||||
|
||||
If the page cross check fails, we read 16 bytes from an aligned
|
||||
address, remove any characters before the string, and continue
|
||||
in the main loop using aligned loads. Since strings crossing a
|
||||
page in the first 16 bytes are rare (probability of
|
||||
16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
|
||||
|
||||
AArch64 systems have a minimum page size of 4k. We don't bother
|
||||
checking for larger page sizes - the cost of setting up the correct
|
||||
page size is just not worth the extra gain from a small reduction in
|
||||
the cases taking the slow path. Note that we only care about
|
||||
whether the first fetch, which may be misaligned, crosses a page
|
||||
boundary. */
|
||||
|
||||
ENTRY(strlen_default)
|
||||
and tmp1, srcin, MIN_PAGE_SIZE - 1
|
||||
mov zeroones, REP8_01
|
||||
cmp tmp1, MIN_PAGE_SIZE - 16
|
||||
b.gt L(page_cross)
|
||||
ldp data1, data2, [srcin]
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul1/2 directly.
|
||||
Since we expect strings to be small and early-exit,
|
||||
byte-swap the data now so has_null1/2 will be correct. */
|
||||
rev data1, data1
|
||||
rev data2, data2
|
||||
#endif
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(main_loop_entry)
|
||||
|
||||
/* Enter with C = has_nul1 == 0. */
|
||||
csel has_nul1, has_nul1, has_nul2, cc
|
||||
mov len, 8
|
||||
rev has_nul1, has_nul1
|
||||
clz tmp1, has_nul1
|
||||
csel len, xzr, len, cc
|
||||
add len, len, tmp1, lsr 3
|
||||
ret
|
||||
|
||||
/* The inner loop processes 32 bytes per iteration and uses the fast
|
||||
NUL check. If we encounter non-ASCII characters, use a second
|
||||
loop with the accurate NUL check. */
|
||||
.p2align 4
|
||||
L(main_loop_entry):
|
||||
bic src, srcin, 15
|
||||
sub src, src, 16
|
||||
L(main_loop):
|
||||
ldp data1, data2, [src, 32]!
|
||||
.Lpage_cross_entry:
|
||||
sub tmp1, data1, zeroones
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp2, tmp1, tmp3
|
||||
tst tmp2, zeroones, lsl 7
|
||||
bne 1f
|
||||
ldp data1, data2, [src, 16]
|
||||
sub tmp1, data1, zeroones
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp2, tmp1, tmp3
|
||||
tst tmp2, zeroones, lsl 7
|
||||
beq L(main_loop)
|
||||
add src, src, 16
|
||||
1:
|
||||
/* The fast check failed, so do the slower, accurate NUL check. */
|
||||
orr tmp2, data1, REP8_7f
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(nonascii_loop)
|
||||
|
||||
/* Enter with C = has_nul1 == 0. */
|
||||
L(tail):
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul1/2 directly. The
|
||||
easiest way to get the correct byte is to byte-swap the data
|
||||
and calculate the syndrome a second time. */
|
||||
csel data1, data1, data2, cc
|
||||
rev data1, data1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
#else
|
||||
csel has_nul1, has_nul1, has_nul2, cc
|
||||
#endif
|
||||
sub len, src, srcin
|
||||
rev has_nul1, has_nul1
|
||||
add tmp2, len, 8
|
||||
clz tmp1, has_nul1
|
||||
csel len, len, tmp2, cc
|
||||
add len, len, tmp1, lsr 3
|
||||
ret
|
||||
|
||||
L(nonascii_loop):
|
||||
ldp data1, data2, [src, 16]!
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
bne L(tail)
|
||||
ldp data1, data2, [src, 16]!
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, REP8_7f
|
||||
bics has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
ccmp has_nul2, 0, 0, eq
|
||||
beq L(nonascii_loop)
|
||||
b L(tail)
|
||||
|
||||
/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
|
||||
srcin to 0x7f, so we ignore any NUL bytes before the string.
|
||||
Then continue in the aligned loop. */
|
||||
L(page_cross):
|
||||
bic src, srcin, 15
|
||||
ldp data1, data2, [src]
|
||||
lsl tmp1, srcin, 3
|
||||
mov tmp4, -1
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
orr tmp1, tmp1, REP8_80
|
||||
orn data1, data1, tmp1
|
||||
orn tmp2, data2, tmp1
|
||||
tst srcin, 8
|
||||
csel data1, data1, tmp4, eq
|
||||
csel data2, data2, tmp2, eq
|
||||
b L(page_cross_entry)
|
||||
|
||||
END(strlen_default)
|
|
@ -1,280 +0,0 @@
|
|||
/* Copyright (c) 2014, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define limit x2
|
||||
#define result x0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x3
|
||||
#define data1w w3
|
||||
#define data2 x4
|
||||
#define data2w w4
|
||||
#define has_nul x5
|
||||
#define diff x6
|
||||
#define syndrome x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define zeroones x11
|
||||
#define pos x12
|
||||
#define limit_wd x13
|
||||
#define mask x14
|
||||
#define endloop x15
|
||||
#define count mask
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.rep 7
|
||||
nop /* Pad so that the loop below fits a cache line. */
|
||||
.endr
|
||||
ENTRY(strncmp_default)
|
||||
cbz limit, .Lret0
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
and count, src1, #7
|
||||
b.ne .Lmisaligned8
|
||||
cbnz count, .Lmutual_align
|
||||
/* Calculate the number of full and partial words -1. */
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
/* Start of performance-critical section -- one 64B cache line. */
|
||||
.Lloop_aligned:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned:
|
||||
subs limit_wd, limit_wd, #1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, pl /* Last Dword or differences. */
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
ccmp endloop, #0, #0, eq
|
||||
b.eq .Lloop_aligned
|
||||
/* End of performance-critical section -- one 64B cache line. */
|
||||
|
||||
/* Not reached the limit, must have found the end or a diff. */
|
||||
tbz limit_wd, #63, .Lnot_limit
|
||||
|
||||
/* Limit % 8 == 0 => all bytes significant. */
|
||||
ands limit, limit, #7
|
||||
b.eq .Lnot_limit
|
||||
|
||||
lsl limit, limit, #3 /* Bits -> bytes. */
|
||||
mov mask, #~0
|
||||
#ifdef __AARCH64EB__
|
||||
lsr mask, mask, limit
|
||||
#else
|
||||
lsl mask, mask, limit
|
||||
#endif
|
||||
bic data1, data1, mask
|
||||
bic data2, data2, mask
|
||||
|
||||
/* Make sure that the NUL byte is marked in the syndrome. */
|
||||
orr has_nul, has_nul, mask
|
||||
|
||||
.Lnot_limit:
|
||||
orr syndrome, diff, has_nul
|
||||
|
||||
#ifndef __AARCH64EB__
|
||||
rev syndrome, syndrome
|
||||
rev data1, data1
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
clz pos, syndrome
|
||||
rev data2, data2
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#else
|
||||
/* For big-endian we cannot use the trick with the syndrome value
|
||||
as carry-propagation can corrupt the upper bits if the trailing
|
||||
bytes in the string contain 0x01. */
|
||||
/* However, if there is no NUL byte in the dword, we can generate
|
||||
the result directly. We can't just subtract the bytes as the
|
||||
MSB might be significant. */
|
||||
cbnz has_nul, 1f
|
||||
cmp data1, data2
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
1:
|
||||
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||
rev tmp3, data1
|
||||
sub tmp1, tmp3, zeroones
|
||||
orr tmp2, tmp3, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
rev has_nul, has_nul
|
||||
orr syndrome, diff, has_nul
|
||||
clz pos, syndrome
|
||||
/* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
that is different, or the top bit of the first zero byte.
|
||||
Shifting left now will bring the critical information into the
|
||||
top bits. */
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/* But we need to zero-extend (char is unsigned) the value and then
|
||||
perform a signed 32-bit subtraction. */
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
#endif
|
||||
|
||||
.Lmutual_align:
|
||||
/* Sources are mutually aligned, but are not currently at an
|
||||
alignment boundary. Round down the addresses and then mask off
|
||||
the bytes that precede the start point.
|
||||
We also need to adjust the limit calculations, but without
|
||||
overflowing if the limit is near ULONG_MAX. */
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
ldr data1, [src1], #8
|
||||
neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
|
||||
#endif
|
||||
and tmp3, limit_wd, #7
|
||||
lsr limit_wd, limit_wd, #3
|
||||
/* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
|
||||
add limit, limit, count
|
||||
add tmp3, tmp3, count
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
add limit_wd, limit_wd, tmp3, lsr #3
|
||||
b .Lstart_realigned
|
||||
|
||||
.p2align 6
|
||||
/* Don't bother with dwords for up to 16 bytes. */
|
||||
.Lmisaligned8:
|
||||
cmp limit, #16
|
||||
b.hs .Ltry_misaligned_words
|
||||
|
||||
.Lbyte_loop:
|
||||
/* Perhaps we can do better than this. */
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs limit, limit, #1
|
||||
ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.eq .Lbyte_loop
|
||||
.Ldone:
|
||||
sub result, data1, data2
|
||||
ret
|
||||
/* Align the SRC1 to a dword by doing a bytewise compare and then do
|
||||
the dword loop. */
|
||||
.Ltry_misaligned_words:
|
||||
lsr limit_wd, limit, #3
|
||||
cbz count, .Ldo_misaligned
|
||||
|
||||
neg count, count
|
||||
and count, count, #7
|
||||
sub limit, limit, count
|
||||
lsr limit_wd, limit, #3
|
||||
|
||||
.Lpage_end_loop:
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
cmp data1w, #1
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.ne .Ldone
|
||||
subs count, count, #1
|
||||
b.hi .Lpage_end_loop
|
||||
|
||||
.Ldo_misaligned:
|
||||
/* Prepare ourselves for the next page crossing. Unlike the aligned
|
||||
loop, we fetch 1 less dword because we risk crossing bounds on
|
||||
SRC2. */
|
||||
mov count, #8
|
||||
subs limit_wd, limit_wd, #1
|
||||
b.lo .Ldone_loop
|
||||
.Lloop_misaligned:
|
||||
and tmp2, src2, #0xff8
|
||||
eor tmp2, tmp2, #0xff8
|
||||
cbz tmp2, .Lpage_end_loop
|
||||
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
ccmp diff, #0, #0, eq
|
||||
b.ne .Lnot_limit
|
||||
subs limit_wd, limit_wd, #1
|
||||
b.pl .Lloop_misaligned
|
||||
|
||||
.Ldone_loop:
|
||||
/* We found a difference or a NULL before the limit was reached. */
|
||||
and limit, limit, #7
|
||||
cbz limit, .Lnot_limit
|
||||
/* Read the last word. */
|
||||
sub src1, src1, 8
|
||||
sub src2, src2, 8
|
||||
ldr data1, [src1, limit]
|
||||
ldr data2, [src2, limit]
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
ccmp diff, #0, #0, eq
|
||||
b.ne .Lnot_limit
|
||||
|
||||
.Lret0:
|
||||
mov result, #0
|
||||
ret
|
||||
END(strncmp_default)
|
|
@ -1,174 +0,0 @@
|
|||
/* Copyright (c) 2014, Linaro Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Linaro nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
/* Arguments and results. */
|
||||
#define srcin x0
|
||||
#define len x0
|
||||
#define limit x1
|
||||
|
||||
/* Locals and temporaries. */
|
||||
#define src x2
|
||||
#define data1 x3
|
||||
#define data2 x4
|
||||
#define data2a x5
|
||||
#define has_nul1 x6
|
||||
#define has_nul2 x7
|
||||
#define tmp1 x8
|
||||
#define tmp2 x9
|
||||
#define tmp3 x10
|
||||
#define tmp4 x11
|
||||
#define zeroones x12
|
||||
#define pos x13
|
||||
#define limit_wd x14
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.Lstart:
|
||||
/* Pre-pad to ensure critical loop begins an icache line. */
|
||||
.rep 7
|
||||
nop
|
||||
.endr
|
||||
/* Put this code here to avoid wasting more space with pre-padding. */
|
||||
.Lhit_limit:
|
||||
mov len, limit
|
||||
ret
|
||||
|
||||
ENTRY(strnlen_default)
|
||||
cbz limit, .Lhit_limit
|
||||
mov zeroones, #REP8_01
|
||||
bic src, srcin, #15
|
||||
ands tmp1, srcin, #15
|
||||
b.ne .Lmisaligned
|
||||
/* Calculate the number of full and partial words -1. */
|
||||
sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
|
||||
|
||||
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
can be done in parallel across the entire word. */
|
||||
/* The inner loop deals with two Dwords at a time. This has a
|
||||
slightly higher start-up cost, but we should win quite quickly,
|
||||
especially on cores with a high number of issue slots per
|
||||
cycle, as we get much better parallelism out of the operations. */
|
||||
|
||||
/* Start of critial section -- keep to one 64Byte cache line. */
|
||||
.Lloop:
|
||||
ldp data1, data2, [src], #16
|
||||
.Lrealigned:
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
subs limit_wd, limit_wd, #1
|
||||
orr tmp1, has_nul1, has_nul2
|
||||
ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
|
||||
b.eq .Lloop
|
||||
/* End of critical section -- keep to one 64Byte cache line. */
|
||||
|
||||
orr tmp1, has_nul1, has_nul2
|
||||
cbz tmp1, .Lhit_limit /* No null in final Qword. */
|
||||
|
||||
/* We know there's a null in the final Qword. The easiest thing
|
||||
to do now is work out the length of the string and return
|
||||
MIN (len, limit). */
|
||||
|
||||
sub len, src, srcin
|
||||
cbz has_nul1, .Lnul_in_data2
|
||||
#ifdef __AARCH64EB__
|
||||
mov data2, data1
|
||||
#endif
|
||||
sub len, len, #8
|
||||
mov has_nul2, has_nul1
|
||||
.Lnul_in_data2:
|
||||
#ifdef __AARCH64EB__
|
||||
/* For big-endian, carry propagation (if the final byte in the
|
||||
string is 0x01) means we cannot use has_nul directly. The
|
||||
easiest way to get the correct byte is to byte-swap the data
|
||||
and calculate the syndrome a second time. */
|
||||
rev data2, data2
|
||||
sub tmp1, data2, zeroones
|
||||
orr tmp2, data2, #REP8_7f
|
||||
bic has_nul2, tmp1, tmp2
|
||||
#endif
|
||||
sub len, len, #8
|
||||
rev has_nul2, has_nul2
|
||||
clz pos, has_nul2
|
||||
add len, len, pos, lsr #3 /* Bits to bytes. */
|
||||
cmp len, limit
|
||||
csel len, len, limit, ls /* Return the lower value. */
|
||||
ret
|
||||
|
||||
.Lmisaligned:
|
||||
/* Deal with a partial first word.
|
||||
We're doing two things in parallel here;
|
||||
1) Calculate the number of words (but avoiding overflow if
|
||||
limit is near ULONG_MAX) - to do this we need to work out
|
||||
limit + tmp1 - 1 as a 65-bit value before shifting it;
|
||||
2) Load and mask the initial data words - we force the bytes
|
||||
before the ones we are interested in to 0xff - this ensures
|
||||
early bytes will not hit any zero detection. */
|
||||
sub limit_wd, limit, #1
|
||||
neg tmp4, tmp1
|
||||
cmp tmp1, #8
|
||||
|
||||
and tmp3, limit_wd, #15
|
||||
lsr limit_wd, limit_wd, #4
|
||||
mov tmp2, #~0
|
||||
|
||||
ldp data1, data2, [src], #16
|
||||
lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
|
||||
add tmp3, tmp3, tmp1
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
|
||||
#else
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
|
||||
#endif
|
||||
add limit_wd, limit_wd, tmp3, lsr #4
|
||||
|
||||
orr data1, data1, tmp2
|
||||
orr data2a, data2, tmp2
|
||||
|
||||
csinv data1, data1, xzr, le
|
||||
csel data2, data2, data2a, le
|
||||
b .Lrealigned
|
||||
END(strnlen_default)
|
|
@ -47,16 +47,34 @@ DEFINE_IFUNC_FOR(memchr) {
|
|||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(memchr_func, memchr_mte);
|
||||
} else {
|
||||
RETURN_FUNC(memchr_func, memchr_default);
|
||||
RETURN_FUNC(memchr_func, __memchr_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
typedef int stpcpy_func(char*, const char*);
|
||||
DEFINE_IFUNC_FOR(stpcpy) {
|
||||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(stpcpy_func, stpcpy_mte);
|
||||
} else {
|
||||
RETURN_FUNC(stpcpy_func, __stpcpy_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
typedef char* strchr_func(const char*, int);
|
||||
DEFINE_IFUNC_FOR(strchr) {
|
||||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(strchr_func, strchr_mte);
|
||||
RETURN_FUNC(strchr_func, __strchr_aarch64_mte);
|
||||
} else {
|
||||
RETURN_FUNC(strchr_func, strchr_default);
|
||||
RETURN_FUNC(strchr_func, __strchr_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
typedef char* strchrnul_func(const char*, int);
|
||||
DEFINE_IFUNC_FOR(strchrnul) {
|
||||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(strchrnul_func, strchrnul_mte);
|
||||
} else {
|
||||
RETURN_FUNC(strchrnul_func, __strchrnul_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,16 +83,25 @@ DEFINE_IFUNC_FOR(strcmp) {
|
|||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(strcmp_func, strcmp_mte);
|
||||
} else {
|
||||
RETURN_FUNC(strcmp_func, strcmp_default);
|
||||
RETURN_FUNC(strcmp_func, __strcmp_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
typedef int strcpy_func(char*, const char*);
|
||||
DEFINE_IFUNC_FOR(strcpy) {
|
||||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(strcpy_func, strcpy_mte);
|
||||
} else {
|
||||
RETURN_FUNC(strcpy_func, __strcpy_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
typedef size_t strlen_func(const char*);
|
||||
DEFINE_IFUNC_FOR(strlen) {
|
||||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(strlen_func, strlen_mte);
|
||||
RETURN_FUNC(strlen_func, __strlen_aarch64_mte);
|
||||
} else {
|
||||
RETURN_FUNC(strlen_func, strlen_default);
|
||||
RETURN_FUNC(strlen_func, __strlen_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,7 +110,7 @@ DEFINE_IFUNC_FOR(strncmp) {
|
|||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(strncmp_func, strncmp_mte);
|
||||
} else {
|
||||
RETURN_FUNC(strncmp_func, strncmp_default);
|
||||
RETURN_FUNC(strncmp_func, __strncmp_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,7 +119,16 @@ DEFINE_IFUNC_FOR(strnlen) {
|
|||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(strnlen_func, strnlen_mte);
|
||||
} else {
|
||||
RETURN_FUNC(strnlen_func, strnlen_default);
|
||||
RETURN_FUNC(strnlen_func, __strnlen_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
typedef char* strrchr_func(const char*, int);
|
||||
DEFINE_IFUNC_FOR(strrchr) {
|
||||
if (supports_mte(arg->_hwcap2)) {
|
||||
RETURN_FUNC(strrchr_func, strrchr_mte);
|
||||
} else {
|
||||
RETURN_FUNC(strrchr_func, __strrchr_aarch64);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,167 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017 ARM Ltd
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the company may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses.
|
||||
*/
|
||||
|
||||
#include <private/bionic_asm.h>
|
||||
|
||||
#define L(l) .L ## l
|
||||
|
||||
/* Parameters and result. */
|
||||
#define src1 x0
|
||||
#define src2 x1
|
||||
#define limit x2
|
||||
#define result w0
|
||||
|
||||
/* Internal variables. */
|
||||
#define data1 x3
|
||||
#define data1w w3
|
||||
#define data1h x4
|
||||
#define data2 x5
|
||||
#define data2w w5
|
||||
#define data2h x6
|
||||
#define tmp1 x7
|
||||
#define tmp2 x8
|
||||
|
||||
/* Small inputs of less than 8 bytes are handled separately. This allows the
|
||||
main code to be speed up using unaligned loads since there are now at least
|
||||
8 bytes to be compared. If the first 8 bytes are equal, align src1.
|
||||
This ensures each iteration does at most one unaligned access even if both
|
||||
src1 and src2 are unaligned, and mutually aligned inputs behave as if
|
||||
aligned. After the main loop, process the last 16 bytes using unaligned
|
||||
accesses. */
|
||||
|
||||
ENTRY(memcmp)
|
||||
.p2align 6
|
||||
subs limit, limit, 8
|
||||
b.lo L(less8)
|
||||
|
||||
/* Limit >= 8, so check first 8 bytes using unaligned loads. */
|
||||
ldr data1, [src1], 8
|
||||
ldr data2, [src2], 8
|
||||
cmp data1, data2
|
||||
b.ne L(return)
|
||||
|
||||
subs limit, limit, 8
|
||||
b.gt L(more16)
|
||||
|
||||
ldr data1, [src1, limit]
|
||||
ldr data2, [src2, limit]
|
||||
b L(return)
|
||||
|
||||
L(more16):
|
||||
ldr data1, [src1], 8
|
||||
ldr data2, [src2], 8
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
|
||||
/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
|
||||
strings. */
|
||||
subs limit, limit, 16
|
||||
b.ls L(last_bytes)
|
||||
|
||||
/* We overlap loads between 0-32 bytes at either side of SRC1 when we
|
||||
try to align, so limit it only to strings larger than 128 bytes. */
|
||||
cmp limit, 96
|
||||
b.ls L(loop16)
|
||||
|
||||
/* Align src1 and adjust src2 with bytes not yet done. */
|
||||
and tmp1, src1, 15
|
||||
add limit, limit, tmp1
|
||||
sub src1, src1, tmp1
|
||||
sub src2, src2, tmp1
|
||||
|
||||
/* Loop performing 16 bytes per iteration using aligned src1.
|
||||
Limit is pre-decremented by 16 and must be larger than zero.
|
||||
Exit if <= 16 bytes left to do or if the data is not equal. */
|
||||
.p2align 4
|
||||
L(loop16):
|
||||
ldp data1, data1h, [src1], 16
|
||||
ldp data2, data2h, [src2], 16
|
||||
subs limit, limit, 16
|
||||
ccmp data1, data2, 0, hi
|
||||
ccmp data1h, data2h, 0, eq
|
||||
b.eq L(loop16)
|
||||
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
mov data1, data1h
|
||||
mov data2, data2h
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
|
||||
/* Compare last 1-16 bytes using unaligned access. */
|
||||
L(last_bytes):
|
||||
add src1, src1, limit
|
||||
add src2, src2, limit
|
||||
ldp data1, data1h, [src1]
|
||||
ldp data2, data2h, [src2]
|
||||
cmp data1, data2
|
||||
bne L(return)
|
||||
mov data1, data1h
|
||||
mov data2, data2h
|
||||
cmp data1, data2
|
||||
|
||||
/* Compare data bytes and set return value to 0, -1 or 1. */
|
||||
L(return):
|
||||
#ifndef __AARCH64EB__
|
||||
rev data1, data1
|
||||
rev data2, data2
|
||||
#endif
|
||||
cmp data1, data2
|
||||
L(ret_eq):
|
||||
cset result, ne
|
||||
cneg result, result, lo
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Compare up to 8 bytes. Limit is [-8..-1]. */
|
||||
L(less8):
|
||||
adds limit, limit, 4
|
||||
b.lo L(less4)
|
||||
ldr data1w, [src1], 4
|
||||
ldr data2w, [src2], 4
|
||||
cmp data1w, data2w
|
||||
b.ne L(return)
|
||||
sub limit, limit, 4
|
||||
L(less4):
|
||||
adds limit, limit, 4
|
||||
beq L(ret_eq)
|
||||
L(byte_loop):
|
||||
ldrb data1w, [src1], 1
|
||||
ldrb data2w, [src2], 1
|
||||
subs limit, limit, 1
|
||||
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
|
||||
b.eq L(byte_loop)
|
||||
sub result, data1w, data2w
|
||||
ret
|
||||
|
||||
END(memcmp)
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019 The Android Open Source Project
|
||||
* Copyright (C) 2020 The Android Open Source Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -26,7 +26,5 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <upstream-openbsd/android/include/openbsd-compat.h>
|
||||
|
||||
#define strlen strlen_mte
|
||||
#include <upstream-openbsd/lib/libc/string/strlen.c>
|
||||
#define strchrnul strchrnul_mte
|
||||
#include <bionic/strchrnul.cpp>
|
|
@ -97,9 +97,9 @@
|
|||
#define REP8_80 0x8080808080808080
|
||||
|
||||
#if defined(STPCPY)
|
||||
ENTRY(stpcpy)
|
||||
ENTRY(stpcpy_mte)
|
||||
#elif defined(STRCPY)
|
||||
ENTRY(strcpy)
|
||||
ENTRY(strcpy_mte)
|
||||
#endif
|
||||
mov zeroones, #REP8_01
|
||||
#if defined(STRCPY)
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019 The Android Open Source Project
|
||||
* Copyright (C) 2020 The Android Open Source Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -26,5 +26,5 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define strchr strchr_mte
|
||||
#include <bionic/strchr.cpp>
|
||||
#define strrchr strrchr_mte
|
||||
#include <bionic/strrchr.cpp>
|
|
@ -34,8 +34,12 @@ ENTRY(name); \
|
|||
END(name)
|
||||
|
||||
FUNCTION_DELEGATE(memchr, memchr_mte)
|
||||
FUNCTION_DELEGATE(strchr, strchr_mte)
|
||||
FUNCTION_DELEGATE(stpcpy, stpcpy_mte)
|
||||
FUNCTION_DELEGATE(strchr, __strchr_aarch64_mte)
|
||||
FUNCTION_DELEGATE(strchrnul, strchrnul_mte)
|
||||
FUNCTION_DELEGATE(strcmp, strcmp_mte)
|
||||
FUNCTION_DELEGATE(strlen, strlen_mte)
|
||||
FUNCTION_DELEGATE(strcpy, strcpy_mte)
|
||||
FUNCTION_DELEGATE(strlen, __strlen_aarch64_mte)
|
||||
FUNCTION_DELEGATE(strrchr, strrchr_mte)
|
||||
FUNCTION_DELEGATE(strncmp, strncmp_mte)
|
||||
FUNCTION_DELEGATE(strnlen, strnlen_mte)
|
||||
|
|
Loading…
Reference in New Issue