android_bionic/libc/arch-x86_64/string/sse2-strcpy-slm.S

1922 lines
36 KiB
ArmAsm

/*
Copyright (c) 2014, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef USE_AS_STRCAT
# ifndef STRCPY
# define STRCPY strcpy
# endif
# ifndef L
# define L(label) .L##label
# endif
# ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
# endif
# ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
# endif
# ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
# endif
# ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
# endif
#endif
#define JMPTBL(I, B) I - B
#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), %rcx; \
lea (%r11, %rcx), %rcx; \
jmp *%rcx
#ifndef USE_AS_STRCAT
# define RETURN ret
.text
ENTRY (STRCPY)
# ifdef USE_AS_STRNCPY
mov %rdx, %r8
test %r8, %r8
jz L(ExitZero)
# endif
mov %rsi, %rcx
# ifndef USE_AS_STPCPY
mov %rdi, %rax /* save result */
# endif
#endif
and $63, %rcx
cmp $32, %rcx
jbe L(SourceStringAlignmentLess32)
and $-16, %rsi
and $15, %rcx
pxor %xmm0, %xmm0
pxor %xmm1, %xmm1
pcmpeqb (%rsi), %xmm1
pmovmskb %xmm1, %rdx
shr %cl, %rdx
#ifdef USE_AS_STRNCPY
# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
mov $16, %r10
sub %rcx, %r10
cmp %r10, %r8
# else
mov $17, %r10
sub %rcx, %r10
cmp %r10, %r8
# endif
jbe L(CopyFrom1To16BytesTailCase2OrCase3)
#endif
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail)
pcmpeqb 16(%rsi), %xmm0
pmovmskb %xmm0, %rdx
#ifdef USE_AS_STRNCPY
add $16, %r10
cmp %r10, %r8
jbe L(CopyFrom1To32BytesCase2OrCase3)
#endif
test %rdx, %rdx
jnz L(CopyFrom1To32Bytes)
movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
movdqu %xmm1, (%rdi)
/* If source adress alignment != destination adress alignment */
.p2align 4
L(Unalign16Both):
sub %rcx, %rdi
#ifdef USE_AS_STRNCPY
add %rcx, %r8
#endif
mov $16, %rcx
movdqa (%rsi, %rcx), %xmm1
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rdx
add $16, %rcx
#ifdef USE_AS_STRNCPY
sub $48, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %rdx, %rdx
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm2)
#else
jnz L(CopyFrom1To16Bytes)
#endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %rdx
add $16, %rcx
#ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %rdx, %rdx
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm3)
#else
jnz L(CopyFrom1To16Bytes)
#endif
movaps 16(%rsi, %rcx), %xmm4
movdqu %xmm3, (%rdi, %rcx)
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %rdx
add $16, %rcx
#ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %rdx, %rdx
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm4)
#else
jnz L(CopyFrom1To16Bytes)
#endif
movaps 16(%rsi, %rcx), %xmm1
movdqu %xmm4, (%rdi, %rcx)
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %rdx
add $16, %rcx
#ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %rdx, %rdx
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm1)
#else
jnz L(CopyFrom1To16Bytes)
#endif
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %rdx
add $16, %rcx
#ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %rdx, %rdx
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm2)
#else
jnz L(CopyFrom1To16Bytes)
#endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %rdx
add $16, %rcx
#ifdef USE_AS_STRNCPY
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %rdx, %rdx
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm3)
#else
jnz L(CopyFrom1To16Bytes)
#endif
movdqu %xmm3, (%rdi, %rcx)
mov %rsi, %rdx
lea 16(%rsi, %rcx), %rsi
and $-0x40, %rsi
sub %rsi, %rdx
sub %rdx, %rdi
#ifdef USE_AS_STRNCPY
lea 128(%r8, %rdx), %r8
#endif
L(Unaligned64Loop):
movaps (%rsi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rsi), %xmm5
movaps 32(%rsi), %xmm3
movaps %xmm3, %xmm6
movaps 48(%rsi), %xmm7
pminub %xmm5, %xmm2
pminub %xmm7, %xmm3
pminub %xmm2, %xmm3
pcmpeqb %xmm0, %xmm3
pmovmskb %xmm3, %rdx
#ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(UnalignedLeaveCase2OrCase3)
#endif
test %rdx, %rdx
jnz L(Unaligned64Leave)
L(Unaligned64Loop_start):
add $64, %rdi
add $64, %rsi
movdqu %xmm4, -64(%rdi)
movaps (%rsi), %xmm2
movdqa %xmm2, %xmm4
movdqu %xmm5, -48(%rdi)
movaps 16(%rsi), %xmm5
pminub %xmm5, %xmm2
movaps 32(%rsi), %xmm3
movdqu %xmm6, -32(%rdi)
movaps %xmm3, %xmm6
movdqu %xmm7, -16(%rdi)
movaps 48(%rsi), %xmm7
pminub %xmm7, %xmm3
pminub %xmm2, %xmm3
pcmpeqb %xmm0, %xmm3
pmovmskb %xmm3, %rdx
#ifdef USE_AS_STRNCPY
sub $64, %r8
jbe L(UnalignedLeaveCase2OrCase3)
#endif
test %rdx, %rdx
jz L(Unaligned64Loop_start)
L(Unaligned64Leave):
pxor %xmm1, %xmm1
pcmpeqb %xmm4, %xmm0
pcmpeqb %xmm5, %xmm1
pmovmskb %xmm0, %rdx
pmovmskb %xmm1, %rcx
test %rdx, %rdx
jnz L(CopyFrom1To16BytesUnaligned_0)
test %rcx, %rcx
jnz L(CopyFrom1To16BytesUnaligned_16)
pcmpeqb %xmm6, %xmm0
pcmpeqb %xmm7, %xmm1
pmovmskb %xmm0, %rdx
pmovmskb %xmm1, %rcx
test %rdx, %rdx
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %rcx, %rdx
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
movdqu %xmm6, 32(%rdi)
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
lea 48(%rdi, %rdx), %rax
# endif
movdqu %xmm7, 48(%rdi)
add $15, %r8
sub %rdx, %r8
lea 49(%rdi, %rdx), %rdi
jmp L(StrncpyFillTailWithZero)
#else
add $48, %rsi
add $48, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
#endif
/* If source adress alignment == destination adress alignment */
L(SourceStringAlignmentLess32):
pxor %xmm0, %xmm0
movdqu (%rsi), %xmm1
movdqu 16(%rsi), %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %rdx
#ifdef USE_AS_STRNCPY
# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $16, %r8
# else
cmp $17, %r8
# endif
jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
#endif
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail1)
pcmpeqb %xmm2, %xmm0
movdqu %xmm1, (%rdi)
pmovmskb %xmm0, %rdx
#ifdef USE_AS_STRNCPY
# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $32, %r8
# else
cmp $33, %r8
# endif
jbe L(CopyFrom1To32Bytes1Case2OrCase3)
#endif
test %rdx, %rdx
jnz L(CopyFrom1To32Bytes1)
and $15, %rcx
and $-16, %rsi
jmp L(Unalign16Both)
/*------End of main part with loops---------------------*/
/* Case1 */
#if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
.p2align 4
L(CopyFrom1To16Bytes):
add %rcx, %rdi
add %rcx, %rsi
bsf %rdx, %rdx
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
#endif
.p2align 4
L(CopyFrom1To16BytesTail):
add %rcx, %rsi
bsf %rdx, %rdx
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
.p2align 4
L(CopyFrom1To32Bytes1):
add $16, %rsi
add $16, %rdi
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $16, %r8
#endif
L(CopyFrom1To16BytesTail1):
bsf %rdx, %rdx
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
.p2align 4
L(CopyFrom1To32Bytes):
bsf %rdx, %rdx
add %rcx, %rsi
add $16, %rdx
sub %rcx, %rdx
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_0):
bsf %rdx, %rdx
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax
# endif
movdqu %xmm4, (%rdi)
add $63, %r8
sub %rdx, %r8
lea 1(%rdi, %rdx), %rdi
jmp L(StrncpyFillTailWithZero)
#else
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
#endif
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %rcx, %rdx
movdqu %xmm4, (%rdi)
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
lea 16(%rdi, %rdx), %rax
# endif
movdqu %xmm5, 16(%rdi)
add $47, %r8
sub %rdx, %r8
lea 17(%rdi, %rdx), %rdi
jmp L(StrncpyFillTailWithZero)
#else
add $16, %rsi
add $16, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
#endif
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %rdx, %rdx
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
lea 32(%rdi, %rdx), %rax
# endif
movdqu %xmm6, 32(%rdi)
add $31, %r8
sub %rdx, %r8
lea 33(%rdi, %rdx), %rdi
jmp L(StrncpyFillTailWithZero)
#else
add $32, %rsi
add $32, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
#endif
#ifdef USE_AS_STRNCPY
# ifndef USE_AS_STRCAT
.p2align 4
L(CopyFrom1To16BytesUnalignedXmm6):
movdqu %xmm6, (%rdi, %rcx)
jmp L(CopyFrom1To16BytesXmmExit)
.p2align 4
L(CopyFrom1To16BytesUnalignedXmm5):
movdqu %xmm5, (%rdi, %rcx)
jmp L(CopyFrom1To16BytesXmmExit)
.p2align 4
L(CopyFrom1To16BytesUnalignedXmm4):
movdqu %xmm4, (%rdi, %rcx)
jmp L(CopyFrom1To16BytesXmmExit)
.p2align 4
L(CopyFrom1To16BytesUnalignedXmm3):
movdqu %xmm3, (%rdi, %rcx)
jmp L(CopyFrom1To16BytesXmmExit)
.p2align 4
L(CopyFrom1To16BytesUnalignedXmm1):
movdqu %xmm1, (%rdi, %rcx)
jmp L(CopyFrom1To16BytesXmmExit)
# endif
.p2align 4
L(CopyFrom1To16BytesExit):
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
/* Case2 */
.p2align 4
L(CopyFrom1To16BytesCase2):
add $16, %r8
add %rcx, %rdi
add %rcx, %rsi
bsf %rdx, %rdx
cmp %r8, %rdx
jb L(CopyFrom1To16BytesExit)
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
.p2align 4
L(CopyFrom1To32BytesCase2):
add %rcx, %rsi
bsf %rdx, %rdx
add $16, %rdx
sub %rcx, %rdx
cmp %r8, %rdx
jb L(CopyFrom1To16BytesExit)
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
L(CopyFrom1To16BytesTailCase2):
add %rcx, %rsi
bsf %rdx, %rdx
cmp %r8, %rdx
jb L(CopyFrom1To16BytesExit)
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
L(CopyFrom1To16BytesTail1Case2):
bsf %rdx, %rdx
cmp %r8, %rdx
jb L(CopyFrom1To16BytesExit)
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
/* Case2 or Case3, Case3 */
.p2align 4
L(CopyFrom1To16BytesCase2OrCase3):
test %rdx, %rdx
jnz L(CopyFrom1To16BytesCase2)
L(CopyFrom1To16BytesCase3):
add $16, %r8
add %rcx, %rdi
add %rcx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
.p2align 4
L(CopyFrom1To32BytesCase2OrCase3):
test %rdx, %rdx
jnz L(CopyFrom1To32BytesCase2)
add %rcx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
.p2align 4
L(CopyFrom1To16BytesTailCase2OrCase3):
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTailCase2)
add %rcx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
.p2align 4
L(CopyFrom1To32Bytes1Case2OrCase3):
add $16, %rdi
add $16, %rsi
sub $16, %r8
L(CopyFrom1To16BytesTail1Case2OrCase3):
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail1Case2)
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
#endif
/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
.p2align 4
L(Exit1):
mov %dh, (%rdi)
#ifdef USE_AS_STPCPY
lea (%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $1, %r8
lea 1(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit2):
mov (%rsi), %dx
mov %dx, (%rdi)
#ifdef USE_AS_STPCPY
lea 1(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $2, %r8
lea 2(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit3):
mov (%rsi), %cx
mov %cx, (%rdi)
mov %dh, 2(%rdi)
#ifdef USE_AS_STPCPY
lea 2(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $3, %r8
lea 3(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit4):
mov (%rsi), %edx
mov %edx, (%rdi)
#ifdef USE_AS_STPCPY
lea 3(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $4, %r8
lea 4(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit5):
mov (%rsi), %ecx
mov %dh, 4(%rdi)
mov %ecx, (%rdi)
#ifdef USE_AS_STPCPY
lea 4(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $5, %r8
lea 5(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit6):
mov (%rsi), %ecx
mov 4(%rsi), %dx
mov %ecx, (%rdi)
mov %dx, 4(%rdi)
#ifdef USE_AS_STPCPY
lea 5(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $6, %r8
lea 6(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit7):
mov (%rsi), %ecx
mov 3(%rsi), %edx
mov %ecx, (%rdi)
mov %edx, 3(%rdi)
#ifdef USE_AS_STPCPY
lea 6(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $7, %r8
lea 7(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit8):
mov (%rsi), %rdx
mov %rdx, (%rdi)
#ifdef USE_AS_STPCPY
lea 7(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $8, %r8
lea 8(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit9):
mov (%rsi), %rcx
mov %dh, 8(%rdi)
mov %rcx, (%rdi)
#ifdef USE_AS_STPCPY
lea 8(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $9, %r8
lea 9(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit10):
mov (%rsi), %rcx
mov 8(%rsi), %dx
mov %rcx, (%rdi)
mov %dx, 8(%rdi)
#ifdef USE_AS_STPCPY
lea 9(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $10, %r8
lea 10(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit11):
mov (%rsi), %rcx
mov 7(%rsi), %edx
mov %rcx, (%rdi)
mov %edx, 7(%rdi)
#ifdef USE_AS_STPCPY
lea 10(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $11, %r8
lea 11(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit12):
mov (%rsi), %rcx
mov 8(%rsi), %edx
mov %rcx, (%rdi)
mov %edx, 8(%rdi)
#ifdef USE_AS_STPCPY
lea 11(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $12, %r8
lea 12(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit13):
mov (%rsi), %rcx
mov 5(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 5(%rdi)
#ifdef USE_AS_STPCPY
lea 12(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $13, %r8
lea 13(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit14):
mov (%rsi), %rcx
mov 6(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 6(%rdi)
#ifdef USE_AS_STPCPY
lea 13(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $14, %r8
lea 14(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit15):
mov (%rsi), %rcx
mov 7(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 7(%rdi)
#ifdef USE_AS_STPCPY
lea 14(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $15, %r8
lea 15(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit16):
movdqu (%rsi), %xmm0
movdqu %xmm0, (%rdi)
#ifdef USE_AS_STPCPY
lea 15(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $16, %r8
lea 16(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit17):
movdqu (%rsi), %xmm0
movdqu %xmm0, (%rdi)
mov %dh, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 16(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $17, %r8
lea 17(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit18):
movdqu (%rsi), %xmm0
mov 16(%rsi), %cx
movdqu %xmm0, (%rdi)
mov %cx, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 17(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $18, %r8
lea 18(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit19):
movdqu (%rsi), %xmm0
mov 15(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %ecx, 15(%rdi)
#ifdef USE_AS_STPCPY
lea 18(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $19, %r8
lea 19(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit20):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 19(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $20, %r8
lea 20(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit21):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
mov %dh, 20(%rdi)
#ifdef USE_AS_STPCPY
lea 20(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $21, %r8
lea 21(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit22):
movdqu (%rsi), %xmm0
mov 14(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 14(%rdi)
#ifdef USE_AS_STPCPY
lea 21(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $22, %r8
lea 22(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit23):
movdqu (%rsi), %xmm0
mov 15(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 15(%rdi)
#ifdef USE_AS_STPCPY
lea 22(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $23, %r8
lea 23(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit24):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 23(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $24, %r8
lea 24(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit25):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
mov %dh, 24(%rdi)
#ifdef USE_AS_STPCPY
lea 24(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $25, %r8
lea 25(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit26):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %cx
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cx, 24(%rdi)
#ifdef USE_AS_STPCPY
lea 25(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $26, %r8
lea 26(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit27):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 23(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 23(%rdi)
#ifdef USE_AS_STPCPY
lea 26(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $27, %r8
lea 27(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit28):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 24(%rdi)
#ifdef USE_AS_STPCPY
lea 27(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $28, %r8
lea 28(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit29):
movdqu (%rsi), %xmm0
movdqu 13(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 13(%rdi)
#ifdef USE_AS_STPCPY
lea 28(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $29, %r8
lea 29(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit30):
movdqu (%rsi), %xmm0
movdqu 14(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 14(%rdi)
#ifdef USE_AS_STPCPY
lea 29(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $30, %r8
lea 30(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit31):
movdqu (%rsi), %xmm0
movdqu 15(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 15(%rdi)
#ifdef USE_AS_STPCPY
lea 30(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $31, %r8
lea 31(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
.p2align 4
L(Exit32):
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 31(%rdi), %rax
#endif
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $32, %r8
lea 32(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
#endif
RETURN
#ifdef USE_AS_STRNCPY
.p2align 4
L(StrncpyExit0):
#ifdef USE_AS_STPCPY
mov %rdi, %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, (%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit1):
mov (%rsi), %dl
mov %dl, (%rdi)
#ifdef USE_AS_STPCPY
lea 1(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 1(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit2):
mov (%rsi), %dx
mov %dx, (%rdi)
#ifdef USE_AS_STPCPY
lea 2(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 2(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit3):
mov (%rsi), %cx
mov 2(%rsi), %dl
mov %cx, (%rdi)
mov %dl, 2(%rdi)
#ifdef USE_AS_STPCPY
lea 3(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 3(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit4):
mov (%rsi), %edx
mov %edx, (%rdi)
#ifdef USE_AS_STPCPY
lea 4(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 4(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit5):
mov (%rsi), %ecx
mov 4(%rsi), %dl
mov %ecx, (%rdi)
mov %dl, 4(%rdi)
#ifdef USE_AS_STPCPY
lea 5(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 5(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit6):
mov (%rsi), %ecx
mov 4(%rsi), %dx
mov %ecx, (%rdi)
mov %dx, 4(%rdi)
#ifdef USE_AS_STPCPY
lea 6(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 6(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit7):
mov (%rsi), %ecx
mov 3(%rsi), %edx
mov %ecx, (%rdi)
mov %edx, 3(%rdi)
#ifdef USE_AS_STPCPY
lea 7(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 7(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit8):
mov (%rsi), %rdx
mov %rdx, (%rdi)
#ifdef USE_AS_STPCPY
lea 8(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 8(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit9):
mov (%rsi), %rcx
mov 8(%rsi), %dl
mov %rcx, (%rdi)
mov %dl, 8(%rdi)
#ifdef USE_AS_STPCPY
lea 9(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 9(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit10):
mov (%rsi), %rcx
mov 8(%rsi), %dx
mov %rcx, (%rdi)
mov %dx, 8(%rdi)
#ifdef USE_AS_STPCPY
lea 10(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 10(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit11):
mov (%rsi), %rcx
mov 7(%rsi), %edx
mov %rcx, (%rdi)
mov %edx, 7(%rdi)
#ifdef USE_AS_STPCPY
lea 11(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 11(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit12):
mov (%rsi), %rcx
mov 8(%rsi), %edx
mov %rcx, (%rdi)
mov %edx, 8(%rdi)
#ifdef USE_AS_STPCPY
lea 12(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 12(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit13):
mov (%rsi), %rcx
mov 5(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 5(%rdi)
#ifdef USE_AS_STPCPY
lea 13(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 13(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit14):
mov (%rsi), %rcx
mov 6(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 6(%rdi)
#ifdef USE_AS_STPCPY
lea 14(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 14(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit15):
mov (%rsi), %rcx
mov 7(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 7(%rdi)
#ifdef USE_AS_STPCPY
lea 15(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 15(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit16):
movdqu (%rsi), %xmm0
movdqu %xmm0, (%rdi)
#ifdef USE_AS_STPCPY
lea 16(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 16(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit17):
movdqu (%rsi), %xmm0
mov 16(%rsi), %cl
movdqu %xmm0, (%rdi)
mov %cl, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 17(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 17(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit18):
movdqu (%rsi), %xmm0
mov 16(%rsi), %cx
movdqu %xmm0, (%rdi)
mov %cx, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 18(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 18(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit19):
movdqu (%rsi), %xmm0
mov 15(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %ecx, 15(%rdi)
#ifdef USE_AS_STPCPY
lea 19(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 19(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit20):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 20(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 20(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit21):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
mov 20(%rsi), %dl
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
mov %dl, 20(%rdi)
#ifdef USE_AS_STPCPY
lea 21(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 21(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit22):
movdqu (%rsi), %xmm0
mov 14(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 14(%rdi)
#ifdef USE_AS_STPCPY
lea 22(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 22(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit23):
movdqu (%rsi), %xmm0
mov 15(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 15(%rdi)
#ifdef USE_AS_STPCPY
lea 23(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 23(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit24):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 24(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 24(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit25):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %cl
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cl, 24(%rdi)
#ifdef USE_AS_STPCPY
lea 25(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 25(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit26):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %cx
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cx, 24(%rdi)
#ifdef USE_AS_STPCPY
lea 26(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 26(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit27):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 23(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 23(%rdi)
#ifdef USE_AS_STPCPY
lea 27(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 27(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit28):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 24(%rdi)
#ifdef USE_AS_STPCPY
lea 28(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 28(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit29):
movdqu (%rsi), %xmm0
movdqu 13(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 13(%rdi)
#ifdef USE_AS_STPCPY
lea 29(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 29(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit30):
movdqu (%rsi), %xmm0
movdqu 14(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 14(%rdi)
#ifdef USE_AS_STPCPY
lea 30(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 30(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit31):
movdqu (%rsi), %xmm0
movdqu 15(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 15(%rdi)
#ifdef USE_AS_STPCPY
lea 31(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 31(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit32):
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
#ifdef USE_AS_STPCPY
lea 32(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 32(%rdi)
#endif
RETURN
.p2align 4
L(StrncpyExit33):
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm2
mov 32(%rsi), %cl
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
mov %cl, 32(%rdi)
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 33(%rdi)
#endif
RETURN
#ifndef USE_AS_STRCAT
.p2align 4
L(Fill0):
RETURN
.p2align 4
L(Fill1):
mov %dl, (%rdi)
RETURN
.p2align 4
L(Fill2):
mov %dx, (%rdi)
RETURN
.p2align 4
L(Fill3):
mov %edx, -1(%rdi)
RETURN
.p2align 4
L(Fill4):
mov %edx, (%rdi)
RETURN
.p2align 4
L(Fill5):
mov %edx, (%rdi)
mov %dl, 4(%rdi)
RETURN
.p2align 4
L(Fill6):
mov %edx, (%rdi)
mov %dx, 4(%rdi)
RETURN
.p2align 4
L(Fill7):
mov %rdx, -1(%rdi)
RETURN
.p2align 4
L(Fill8):
mov %rdx, (%rdi)
RETURN
.p2align 4
L(Fill9):
mov %rdx, (%rdi)
mov %dl, 8(%rdi)
RETURN
.p2align 4
L(Fill10):
mov %rdx, (%rdi)
mov %dx, 8(%rdi)
RETURN
.p2align 4
L(Fill11):
mov %rdx, (%rdi)
mov %edx, 7(%rdi)
RETURN
.p2align 4
L(Fill12):
mov %rdx, (%rdi)
mov %edx, 8(%rdi)
RETURN
.p2align 4
L(Fill13):
mov %rdx, (%rdi)
mov %rdx, 5(%rdi)
RETURN
.p2align 4
L(Fill14):
mov %rdx, (%rdi)
mov %rdx, 6(%rdi)
RETURN
.p2align 4
L(Fill15):
movdqu %xmm0, -1(%rdi)
RETURN
.p2align 4
L(Fill16):
movdqu %xmm0, (%rdi)
RETURN
.p2align 4
L(CopyFrom1To16BytesUnalignedXmm2):
movdqu %xmm2, (%rdi, %rcx)
.p2align 4
L(CopyFrom1To16BytesXmmExit):
bsf %rdx, %rdx
add $15, %r8
add %rcx, %rdi
#ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax
#endif
sub %rdx, %r8
lea 1(%rdi, %rdx), %rdi
.p2align 4
L(StrncpyFillTailWithZero):
pxor %xmm0, %xmm0
xor %rdx, %rdx
sub $16, %r8
jbe L(StrncpyFillExit)
movdqu %xmm0, (%rdi)
add $16, %rdi
mov %rdi, %rsi
and $0xf, %rsi
sub %rsi, %rdi
add %rsi, %r8
sub $64, %r8
jb L(StrncpyFillLess64)
L(StrncpyFillLoopMovdqa):
movdqa %xmm0, (%rdi)
movdqa %xmm0, 16(%rdi)
movdqa %xmm0, 32(%rdi)
movdqa %xmm0, 48(%rdi)
add $64, %rdi
sub $64, %r8
jae L(StrncpyFillLoopMovdqa)
L(StrncpyFillLess64):
add $32, %r8
jl L(StrncpyFillLess32)
movdqa %xmm0, (%rdi)
movdqa %xmm0, 16(%rdi)
add $32, %rdi
sub $16, %r8
jl L(StrncpyFillExit)
movdqa %xmm0, (%rdi)
add $16, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
L(StrncpyFillLess32):
add $16, %r8
jl L(StrncpyFillExit)
movdqa %xmm0, (%rdi)
add $16, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
L(StrncpyFillExit):
add $16, %r8
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
/* end of ifndef USE_AS_STRCAT */
#endif
.p2align 4
L(UnalignedLeaveCase2OrCase3):
test %rdx, %rdx
jnz L(Unaligned64LeaveCase2)
L(Unaligned64LeaveCase3):
lea 64(%r8), %rcx
and $-16, %rcx
add $48, %r8
jl L(CopyFrom1To16BytesCase3)
movdqu %xmm4, (%rdi)
sub $16, %r8
jb L(CopyFrom1To16BytesCase3)
movdqu %xmm5, 16(%rdi)
sub $16, %r8
jb L(CopyFrom1To16BytesCase3)
movdqu %xmm6, 32(%rdi)
sub $16, %r8
jb L(CopyFrom1To16BytesCase3)
movdqu %xmm7, 48(%rdi)
#ifdef USE_AS_STPCPY
lea 64(%rdi), %rax
#endif
#ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 64(%rdi)
#endif
RETURN
.p2align 4
L(Unaligned64LeaveCase2):
xor %rcx, %rcx
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %rdx
add $48, %r8
jle L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx
#ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm4)
#else
jnz L(CopyFrom1To16Bytes)
#endif
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %rdx
movdqu %xmm4, (%rdi)
add $16, %rcx
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx
#ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm5)
#else
jnz L(CopyFrom1To16Bytes)
#endif
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %rdx
movdqu %xmm5, 16(%rdi)
add $16, %rcx
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx
#ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm6)
#else
jnz L(CopyFrom1To16Bytes)
#endif
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %rdx
movdqu %xmm6, 32(%rdi)
lea 16(%rdi, %rcx), %rdi
lea 16(%rsi, %rcx), %rsi
bsf %rdx, %rdx
cmp %r8, %rdx
jb L(CopyFrom1To16BytesExit)
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
.p2align 4
L(ExitZero):
#ifndef USE_AS_STRCAT
mov %rdi, %rax
#endif
RETURN
#endif
#ifndef USE_AS_STRCAT
END (STRCPY)
#else
END (STRCAT)
#endif
.p2align 4
.section .rodata
L(ExitTable):
.int JMPTBL(L(Exit1), L(ExitTable))
.int JMPTBL(L(Exit2), L(ExitTable))
.int JMPTBL(L(Exit3), L(ExitTable))
.int JMPTBL(L(Exit4), L(ExitTable))
.int JMPTBL(L(Exit5), L(ExitTable))
.int JMPTBL(L(Exit6), L(ExitTable))
.int JMPTBL(L(Exit7), L(ExitTable))
.int JMPTBL(L(Exit8), L(ExitTable))
.int JMPTBL(L(Exit9), L(ExitTable))
.int JMPTBL(L(Exit10), L(ExitTable))
.int JMPTBL(L(Exit11), L(ExitTable))
.int JMPTBL(L(Exit12), L(ExitTable))
.int JMPTBL(L(Exit13), L(ExitTable))
.int JMPTBL(L(Exit14), L(ExitTable))
.int JMPTBL(L(Exit15), L(ExitTable))
.int JMPTBL(L(Exit16), L(ExitTable))
.int JMPTBL(L(Exit17), L(ExitTable))
.int JMPTBL(L(Exit18), L(ExitTable))
.int JMPTBL(L(Exit19), L(ExitTable))
.int JMPTBL(L(Exit20), L(ExitTable))
.int JMPTBL(L(Exit21), L(ExitTable))
.int JMPTBL(L(Exit22), L(ExitTable))
.int JMPTBL(L(Exit23), L(ExitTable))
.int JMPTBL(L(Exit24), L(ExitTable))
.int JMPTBL(L(Exit25), L(ExitTable))
.int JMPTBL(L(Exit26), L(ExitTable))
.int JMPTBL(L(Exit27), L(ExitTable))
.int JMPTBL(L(Exit28), L(ExitTable))
.int JMPTBL(L(Exit29), L(ExitTable))
.int JMPTBL(L(Exit30), L(ExitTable))
.int JMPTBL(L(Exit31), L(ExitTable))
.int JMPTBL(L(Exit32), L(ExitTable))
#ifdef USE_AS_STRNCPY
L(ExitStrncpyTable):
.int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
# ifndef USE_AS_STRCAT
.p2align 4
L(FillTable):
.int JMPTBL(L(Fill0), L(FillTable))
.int JMPTBL(L(Fill1), L(FillTable))
.int JMPTBL(L(Fill2), L(FillTable))
.int JMPTBL(L(Fill3), L(FillTable))
.int JMPTBL(L(Fill4), L(FillTable))
.int JMPTBL(L(Fill5), L(FillTable))
.int JMPTBL(L(Fill6), L(FillTable))
.int JMPTBL(L(Fill7), L(FillTable))
.int JMPTBL(L(Fill8), L(FillTable))
.int JMPTBL(L(Fill9), L(FillTable))
.int JMPTBL(L(Fill10), L(FillTable))
.int JMPTBL(L(Fill11), L(FillTable))
.int JMPTBL(L(Fill12), L(FillTable))
.int JMPTBL(L(Fill13), L(FillTable))
.int JMPTBL(L(Fill14), L(FillTable))
.int JMPTBL(L(Fill15), L(FillTable))
.int JMPTBL(L(Fill16), L(FillTable))
# endif
#endif