1063 lines
25 KiB
ArmAsm
1063 lines
25 KiB
ArmAsm
/*
|
|
Copyright (c) 2011 Intel Corporation
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef L
|
|
# define L(label) .L##label
|
|
#endif
|
|
|
|
#ifndef cfi_startproc
|
|
# define cfi_startproc .cfi_startproc
|
|
#endif
|
|
|
|
#ifndef cfi_endproc
|
|
# define cfi_endproc .cfi_endproc
|
|
#endif
|
|
|
|
#ifndef cfi_rel_offset
|
|
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
|
#endif
|
|
|
|
#ifndef cfi_restore
|
|
# define cfi_restore(reg) .cfi_restore reg
|
|
#endif
|
|
|
|
#ifndef cfi_adjust_cfa_offset
|
|
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
|
#endif
|
|
|
|
#ifndef ENTRY
|
|
# define ENTRY(name) \
|
|
.type name, @function; \
|
|
.globl name; \
|
|
.p2align 4; \
|
|
name: \
|
|
cfi_startproc
|
|
#endif
|
|
|
|
#ifndef END
|
|
# define END(name) \
|
|
cfi_endproc; \
|
|
.size name, .-name
|
|
#endif
|
|
|
|
#define CFI_PUSH(REG) \
|
|
cfi_adjust_cfa_offset (4); \
|
|
cfi_rel_offset (REG, 0)
|
|
|
|
#define CFI_POP(REG) \
|
|
cfi_adjust_cfa_offset (-4); \
|
|
cfi_restore (REG)
|
|
|
|
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
#define POP(REG) popl REG; CFI_POP (REG)
|
|
|
|
#define ENTRANCE PUSH(%esi); PUSH(%edi)
|
|
#define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
|
|
#define PARMS 4
|
|
#define STR1 PARMS
|
|
#define STR2 STR1+4
|
|
|
|
.text
|
|
ENTRY (wcscmp)
|
|
/*
|
|
* This implementation uses SSE to compare up to 16 bytes at a time.
|
|
*/
|
|
mov STR1(%esp), %edx
|
|
mov STR2(%esp), %eax
|
|
|
|
mov (%eax), %ecx
|
|
cmp %ecx, (%edx)
|
|
jne L(neq)
|
|
test %ecx, %ecx
|
|
jz L(eq)
|
|
|
|
mov 4(%eax), %ecx
|
|
cmp %ecx, 4(%edx)
|
|
jne L(neq)
|
|
test %ecx, %ecx
|
|
jz L(eq)
|
|
|
|
mov 8(%eax), %ecx
|
|
cmp %ecx, 8(%edx)
|
|
jne L(neq)
|
|
test %ecx, %ecx
|
|
jz L(eq)
|
|
|
|
mov 12(%eax), %ecx
|
|
cmp %ecx, 12(%edx)
|
|
jne L(neq)
|
|
test %ecx, %ecx
|
|
jz L(eq)
|
|
|
|
ENTRANCE
|
|
add $16, %eax
|
|
add $16, %edx
|
|
|
|
mov %eax, %esi
|
|
mov %edx, %edi
|
|
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
|
|
mov %al, %ch
|
|
mov %dl, %cl
|
|
and $63, %eax /* esi alignment in cache line */
|
|
and $63, %edx /* edi alignment in cache line */
|
|
and $15, %cl
|
|
jz L(continue_00)
|
|
cmp $16, %edx
|
|
jb L(continue_0)
|
|
cmp $32, %edx
|
|
jb L(continue_16)
|
|
cmp $48, %edx
|
|
jb L(continue_32)
|
|
|
|
L(continue_48):
|
|
and $15, %ch
|
|
jz L(continue_48_00)
|
|
cmp $16, %eax
|
|
jb L(continue_0_48)
|
|
cmp $32, %eax
|
|
jb L(continue_16_48)
|
|
cmp $48, %eax
|
|
jb L(continue_32_48)
|
|
|
|
.p2align 4
|
|
L(continue_48_48):
|
|
mov (%esi), %ecx
|
|
cmp %ecx, (%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 4(%esi), %ecx
|
|
cmp %ecx, 4(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 8(%esi), %ecx
|
|
cmp %ecx, 8(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 12(%esi), %ecx
|
|
cmp %ecx, 12(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
movdqu 16(%edi), %xmm1
|
|
movdqu 16(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
movdqu 32(%edi), %xmm1
|
|
movdqu 32(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
movdqu 48(%edi), %xmm1
|
|
movdqu 48(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_48)
|
|
|
|
add $64, %esi
|
|
add $64, %edi
|
|
jmp L(continue_48_48)
|
|
|
|
L(continue_0):
|
|
and $15, %ch
|
|
jz L(continue_0_00)
|
|
cmp $16, %eax
|
|
jb L(continue_0_0)
|
|
cmp $32, %eax
|
|
jb L(continue_0_16)
|
|
cmp $48, %eax
|
|
jb L(continue_0_32)
|
|
|
|
.p2align 4
|
|
L(continue_0_48):
|
|
mov (%esi), %ecx
|
|
cmp %ecx, (%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 4(%esi), %ecx
|
|
cmp %ecx, 4(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 8(%esi), %ecx
|
|
cmp %ecx, 8(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 12(%esi), %ecx
|
|
cmp %ecx, 12(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
movdqu 16(%edi), %xmm1
|
|
movdqu 16(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
movdqu 32(%edi), %xmm1
|
|
movdqu 32(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
mov 48(%esi), %ecx
|
|
cmp %ecx, 48(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 52(%esi), %ecx
|
|
cmp %ecx, 52(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 56(%esi), %ecx
|
|
cmp %ecx, 56(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 60(%esi), %ecx
|
|
cmp %ecx, 60(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
add $64, %esi
|
|
add $64, %edi
|
|
jmp L(continue_0_48)
|
|
|
|
.p2align 4
|
|
L(continue_00):
|
|
and $15, %ch
|
|
jz L(continue_00_00)
|
|
cmp $16, %eax
|
|
jb L(continue_00_0)
|
|
cmp $32, %eax
|
|
jb L(continue_00_16)
|
|
cmp $48, %eax
|
|
jb L(continue_00_32)
|
|
|
|
.p2align 4
|
|
L(continue_00_48):
|
|
pcmpeqd (%edi), %xmm0
|
|
mov (%edi), %eax
|
|
pmovmskb %xmm0, %ecx
|
|
test %ecx, %ecx
|
|
jnz L(less4_double_words1)
|
|
|
|
cmp (%esi), %eax
|
|
jne L(nequal)
|
|
|
|
mov 4(%edi), %eax
|
|
cmp 4(%esi), %eax
|
|
jne L(nequal)
|
|
|
|
mov 8(%edi), %eax
|
|
cmp 8(%esi), %eax
|
|
jne L(nequal)
|
|
|
|
mov 12(%edi), %eax
|
|
cmp 12(%esi), %eax
|
|
jne L(nequal)
|
|
|
|
movdqu 16(%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
movdqu 32(%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
movdqu 48(%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_48)
|
|
|
|
add $64, %esi
|
|
add $64, %edi
|
|
jmp L(continue_00_48)
|
|
|
|
.p2align 4
|
|
L(continue_32):
|
|
and $15, %ch
|
|
jz L(continue_32_00)
|
|
cmp $16, %eax
|
|
jb L(continue_0_32)
|
|
cmp $32, %eax
|
|
jb L(continue_16_32)
|
|
cmp $48, %eax
|
|
jb L(continue_32_32)
|
|
|
|
.p2align 4
|
|
L(continue_32_48):
|
|
mov (%esi), %ecx
|
|
cmp %ecx, (%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 4(%esi), %ecx
|
|
cmp %ecx, 4(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 8(%esi), %ecx
|
|
cmp %ecx, 8(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 12(%esi), %ecx
|
|
cmp %ecx, 12(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 16(%esi), %ecx
|
|
cmp %ecx, 16(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 20(%esi), %ecx
|
|
cmp %ecx, 20(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 24(%esi), %ecx
|
|
cmp %ecx, 24(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 28(%esi), %ecx
|
|
cmp %ecx, 28(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
movdqu 32(%edi), %xmm1
|
|
movdqu 32(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
movdqu 48(%edi), %xmm1
|
|
movdqu 48(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_48)
|
|
|
|
add $64, %esi
|
|
add $64, %edi
|
|
jmp L(continue_32_48)
|
|
|
|
.p2align 4
|
|
L(continue_16):
|
|
and $15, %ch
|
|
jz L(continue_16_00)
|
|
cmp $16, %eax
|
|
jb L(continue_0_16)
|
|
cmp $32, %eax
|
|
jb L(continue_16_16)
|
|
cmp $48, %eax
|
|
jb L(continue_16_32)
|
|
|
|
.p2align 4
|
|
L(continue_16_48):
|
|
mov (%esi), %ecx
|
|
cmp %ecx, (%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 4(%esi), %ecx
|
|
cmp %ecx, 4(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 8(%esi), %ecx
|
|
cmp %ecx, 8(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 12(%esi), %ecx
|
|
cmp %ecx, 12(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
movdqu 16(%edi), %xmm1
|
|
movdqu 16(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
mov 32(%esi), %ecx
|
|
cmp %ecx, 32(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 36(%esi), %ecx
|
|
cmp %ecx, 36(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 40(%esi), %ecx
|
|
cmp %ecx, 40(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 44(%esi), %ecx
|
|
cmp %ecx, 44(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
movdqu 48(%edi), %xmm1
|
|
movdqu 48(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_48)
|
|
|
|
add $64, %esi
|
|
add $64, %edi
|
|
jmp L(continue_16_48)
|
|
|
|
.p2align 4
|
|
L(continue_00_00):
|
|
movdqa (%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
movdqa 16(%edi), %xmm3
|
|
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
|
|
pmovmskb %xmm3, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
movdqa 32(%edi), %xmm5
|
|
pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm5 /* packed sub of comparison results*/
|
|
pmovmskb %xmm5, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
movdqa 48(%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_48)
|
|
|
|
add $64, %esi
|
|
add $64, %edi
|
|
jmp L(continue_00_00)
|
|
|
|
.p2align 4
|
|
L(continue_00_32):
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
add $16, %esi
|
|
add $16, %edi
|
|
jmp L(continue_00_48)
|
|
|
|
.p2align 4
|
|
L(continue_00_16):
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
movdqu 16(%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
add $32, %esi
|
|
add $32, %edi
|
|
jmp L(continue_00_48)
|
|
|
|
.p2align 4
|
|
L(continue_00_0):
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
movdqu 16(%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
movdqu 32(%esi), %xmm2
|
|
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
pmovmskb %xmm2, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
add $48, %esi
|
|
add $48, %edi
|
|
jmp L(continue_00_48)
|
|
|
|
.p2align 4
|
|
L(continue_48_00):
|
|
pcmpeqd (%esi), %xmm0
|
|
mov (%edi), %eax
|
|
pmovmskb %xmm0, %ecx
|
|
test %ecx, %ecx
|
|
jnz L(less4_double_words1)
|
|
|
|
cmp (%esi), %eax
|
|
jne L(nequal)
|
|
|
|
mov 4(%edi), %eax
|
|
cmp 4(%esi), %eax
|
|
jne L(nequal)
|
|
|
|
mov 8(%edi), %eax
|
|
cmp 8(%esi), %eax
|
|
jne L(nequal)
|
|
|
|
mov 12(%edi), %eax
|
|
cmp 12(%esi), %eax
|
|
jne L(nequal)
|
|
|
|
movdqu 16(%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
movdqu 32(%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
movdqu 48(%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_48)
|
|
|
|
add $64, %esi
|
|
add $64, %edi
|
|
jmp L(continue_48_00)
|
|
|
|
.p2align 4
|
|
L(continue_32_00):
|
|
movdqu (%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
add $16, %esi
|
|
add $16, %edi
|
|
jmp L(continue_48_00)
|
|
|
|
.p2align 4
|
|
L(continue_16_00):
|
|
movdqu (%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
movdqu 16(%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
add $32, %esi
|
|
add $32, %edi
|
|
jmp L(continue_48_00)
|
|
|
|
.p2align 4
|
|
L(continue_0_00):
|
|
movdqu (%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
movdqu 16(%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
movdqu 32(%edi), %xmm1
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
add $48, %esi
|
|
add $48, %edi
|
|
jmp L(continue_48_00)
|
|
|
|
.p2align 4
|
|
L(continue_32_32):
|
|
movdqu (%edi), %xmm1
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
add $16, %esi
|
|
add $16, %edi
|
|
jmp L(continue_48_48)
|
|
|
|
.p2align 4
|
|
L(continue_16_16):
|
|
movdqu (%edi), %xmm1
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
movdqu 16(%edi), %xmm3
|
|
movdqu 16(%esi), %xmm4
|
|
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
|
|
pmovmskb %xmm3, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
add $32, %esi
|
|
add $32, %edi
|
|
jmp L(continue_48_48)
|
|
|
|
.p2align 4
|
|
L(continue_0_0):
|
|
movdqu (%edi), %xmm1
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
movdqu 16(%edi), %xmm3
|
|
movdqu 16(%esi), %xmm4
|
|
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
|
|
pmovmskb %xmm3, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
movdqu 32(%edi), %xmm1
|
|
movdqu 32(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_32)
|
|
|
|
add $48, %esi
|
|
add $48, %edi
|
|
jmp L(continue_48_48)
|
|
|
|
.p2align 4
|
|
L(continue_0_16):
|
|
movdqu (%edi), %xmm1
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
movdqu 16(%edi), %xmm1
|
|
movdqu 16(%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words_16)
|
|
|
|
add $32, %esi
|
|
add $32, %edi
|
|
jmp L(continue_32_48)
|
|
|
|
.p2align 4
|
|
L(continue_0_32):
|
|
movdqu (%edi), %xmm1
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
add $16, %esi
|
|
add $16, %edi
|
|
jmp L(continue_16_48)
|
|
|
|
.p2align 4
|
|
L(continue_16_32):
|
|
movdqu (%edi), %xmm1
|
|
movdqu (%esi), %xmm2
|
|
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
|
|
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
|
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
pmovmskb %xmm1, %edx
|
|
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
|
|
jnz L(less4_double_words)
|
|
|
|
add $16, %esi
|
|
add $16, %edi
|
|
jmp L(continue_32_48)
|
|
|
|
.p2align 4
|
|
L(less4_double_words1):
|
|
cmp (%esi), %eax
|
|
jne L(nequal)
|
|
test %eax, %eax
|
|
jz L(equal)
|
|
|
|
mov 4(%esi), %ecx
|
|
cmp %ecx, 4(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 8(%esi), %ecx
|
|
cmp %ecx, 8(%edi)
|
|
jne L(nequal)
|
|
test %ecx, %ecx
|
|
jz L(equal)
|
|
|
|
mov 12(%esi), %ecx
|
|
cmp %ecx, 12(%edi)
|
|
jne L(nequal)
|
|
xor %eax, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(less4_double_words):
|
|
xor %eax, %eax
|
|
test %dl, %dl
|
|
jz L(next_two_double_words)
|
|
and $15, %dl
|
|
jz L(second_double_word)
|
|
mov (%esi), %ecx
|
|
cmp %ecx, (%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_double_word):
|
|
mov 4(%esi), %ecx
|
|
cmp %ecx, 4(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(next_two_double_words):
|
|
and $15, %dh
|
|
jz L(fourth_double_word)
|
|
mov 8(%esi), %ecx
|
|
cmp %ecx, 8(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_double_word):
|
|
mov 12(%esi), %ecx
|
|
cmp %ecx, 12(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(less4_double_words_16):
|
|
xor %eax, %eax
|
|
test %dl, %dl
|
|
jz L(next_two_double_words_16)
|
|
and $15, %dl
|
|
jz L(second_double_word_16)
|
|
mov 16(%esi), %ecx
|
|
cmp %ecx, 16(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_double_word_16):
|
|
mov 20(%esi), %ecx
|
|
cmp %ecx, 20(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(next_two_double_words_16):
|
|
and $15, %dh
|
|
jz L(fourth_double_word_16)
|
|
mov 24(%esi), %ecx
|
|
cmp %ecx, 24(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_double_word_16):
|
|
mov 28(%esi), %ecx
|
|
cmp %ecx, 28(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(less4_double_words_32):
|
|
xor %eax, %eax
|
|
test %dl, %dl
|
|
jz L(next_two_double_words_32)
|
|
and $15, %dl
|
|
jz L(second_double_word_32)
|
|
mov 32(%esi), %ecx
|
|
cmp %ecx, 32(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_double_word_32):
|
|
mov 36(%esi), %ecx
|
|
cmp %ecx, 36(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(next_two_double_words_32):
|
|
and $15, %dh
|
|
jz L(fourth_double_word_32)
|
|
mov 40(%esi), %ecx
|
|
cmp %ecx, 40(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_double_word_32):
|
|
mov 44(%esi), %ecx
|
|
cmp %ecx, 44(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(less4_double_words_48):
|
|
xor %eax, %eax
|
|
test %dl, %dl
|
|
jz L(next_two_double_words_48)
|
|
and $15, %dl
|
|
jz L(second_double_word_48)
|
|
mov 48(%esi), %ecx
|
|
cmp %ecx, 48(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_double_word_48):
|
|
mov 52(%esi), %ecx
|
|
cmp %ecx, 52(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(next_two_double_words_48):
|
|
and $15, %dh
|
|
jz L(fourth_double_word_48)
|
|
mov 56(%esi), %ecx
|
|
cmp %ecx, 56(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_double_word_48):
|
|
mov 60(%esi), %ecx
|
|
cmp %ecx, 60(%edi)
|
|
jne L(nequal)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(nequal):
|
|
mov $1, %eax
|
|
jg L(return)
|
|
neg %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(return):
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(equal):
|
|
xorl %eax, %eax
|
|
RETURN
|
|
|
|
CFI_POP (%edi)
|
|
CFI_POP (%esi)
|
|
|
|
.p2align 4
|
|
L(neq):
|
|
mov $1, %eax
|
|
jg L(neq_bigger)
|
|
neg %eax
|
|
|
|
L(neq_bigger):
|
|
ret
|
|
|
|
.p2align 4
|
|
L(eq):
|
|
xorl %eax, %eax
|
|
ret
|
|
|
|
END (wcscmp)
|
|
|