211 lines
6.6 KiB
ArmAsm
211 lines
6.6 KiB
ArmAsm
/*
|
|
Copyright (c) 2014, Intel Corporation
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/******************************************************************************/
|
|
// ALGORITHM DESCRIPTION
|
|
// ---------------------
|
|
//
|
|
// X87 version:
|
|
// Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt.
|
|
//
|
|
// SSE version:
|
|
// Swap x, y if |x|<|y|
|
|
// For x=2^k*x, get y=y*2^(-k)
|
|
// Get S ~ sqrt(x^2+y^2) (leading 1 + leading 25 mantissa bits)
|
|
//
|
|
// Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) +
|
|
// + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) )
|
|
//
|
|
// Result is 2^k*(S + Se), where Se = S*e
|
|
// S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S )
|
|
//
|
|
// Return 2^k*(S+Se)
|
|
//
|
|
// For |y/x|<2^(-64), return x
|
|
//
|
|
// For cases where maximum biased exponent is either greater than 7fdh or
|
|
// below 32, take a special path to check for special cases (0, NaN, Inf),
|
|
// possible overflow, and more accurate computation for denormal results
|
|
//
|
|
// Special cases:
|
|
// hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent
|
|
// hypot(x,+-0) is equivalent to fabs(x)
|
|
// hypot(x,y) = y if (x==NaN or x==INF) and y==INF
|
|
// hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!)
|
|
// hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF)
|
|
//
|
|
/******************************************************************************/
|
|
|
|
#include <private/bionic_asm.h>
|
|
# -- Begin hypot
|
|
ENTRY(hypot)
|
|
# parameter 1: %xmm0
|
|
# parameter 2: %xmm1
|
|
..B1.1:
|
|
..___tag_value_hypot.1:
|
|
..___tag_value_hypot.3:
|
|
..B1.2:
|
|
subq $64, %rsp
|
|
movapd static_const_table(%rip), %xmm3
|
|
movsd %xmm0, 48(%rsp)
|
|
movsd %xmm1, 56(%rsp)
|
|
andpd %xmm3, %xmm0
|
|
andpd %xmm3, %xmm1
|
|
pextrw $3, %xmm0, %eax
|
|
pextrw $3, %xmm1, %edx
|
|
cmpl $24528, %eax
|
|
ja .L_2TAG_PACKET_0.0.1
|
|
cmpl $24528, %edx
|
|
ja .L_2TAG_PACKET_0.0.1
|
|
.L_2TAG_PACKET_1.0.1:
|
|
fldl 48(%rsp)
|
|
fldl 56(%rsp)
|
|
fxch %st(1)
|
|
fmul %st(0), %st
|
|
fxch %st(1)
|
|
nop
|
|
fmul %st(0), %st
|
|
faddp %st, %st(1)
|
|
fsqrt
|
|
jmp .L_2TAG_PACKET_2.0.1
|
|
.L_2TAG_PACKET_0.0.1:
|
|
cmpl $32752, %eax
|
|
movl %eax, %ecx
|
|
jae .L_2TAG_PACKET_3.0.1
|
|
subl %edx, %ecx
|
|
cmpl $32752, %edx
|
|
jae .L_2TAG_PACKET_3.0.1
|
|
addl $928, %ecx
|
|
addl %edx, %eax
|
|
cmpl $1856, %ecx
|
|
ja .L_2TAG_PACKET_4.0.1
|
|
cmpl $49056, %eax
|
|
jb .L_2TAG_PACKET_1.0.1
|
|
fldl 48(%rsp)
|
|
fldl 56(%rsp)
|
|
fxch %st(1)
|
|
fmul %st(0), %st
|
|
fxch %st(1)
|
|
nop
|
|
fmul %st(0), %st
|
|
faddp %st, %st(1)
|
|
fsqrt
|
|
.L_2TAG_PACKET_5.0.1:
|
|
fstl (%rsp)
|
|
fstpt 16(%rsp)
|
|
xorl %eax, %eax
|
|
movw 24(%rsp), %ax
|
|
cmpl $17407, %eax
|
|
jae .L_2TAG_PACKET_6.0.1
|
|
fldl (%rsp)
|
|
jmp .L_2TAG_PACKET_7.0.1
|
|
.L_2TAG_PACKET_4.0.1:
|
|
movsd %xmm0, 32(%rsp)
|
|
movsd %xmm1, 40(%rsp)
|
|
fldl 32(%rsp)
|
|
faddl 40(%rsp)
|
|
jmp .L_2TAG_PACKET_5.0.1
|
|
.L_2TAG_PACKET_6.0.1:
|
|
fldl (%rsp)
|
|
jmp .L_2TAG_PACKET_7.0.1
|
|
.L_2TAG_PACKET_3.0.1:
|
|
shufpd $0, %xmm1, %xmm0
|
|
movdqa %xmm0, %xmm2
|
|
movdqa 16+static_const_table(%rip), %xmm3
|
|
movsd %xmm0, 32(%rsp)
|
|
movsd %xmm1, 40(%rsp)
|
|
cmppd $3, %xmm0, %xmm2
|
|
cmppd $0, %xmm0, %xmm3
|
|
movmskpd %xmm2, %edx
|
|
movmskpd %xmm3, %rax
|
|
testl %edx, %edx
|
|
je .L_2TAG_PACKET_8.0.1
|
|
fldl 32(%rsp)
|
|
fmull 40(%rsp)
|
|
testq $1, %rax
|
|
jne .L_2TAG_PACKET_9.0.1
|
|
testq $2, %rax
|
|
jne .L_2TAG_PACKET_10.0.1
|
|
jmp .L_2TAG_PACKET_2.0.1
|
|
.L_2TAG_PACKET_8.0.1:
|
|
fldl 32(%rsp)
|
|
faddl 40(%rsp)
|
|
jmp .L_2TAG_PACKET_2.0.1
|
|
.L_2TAG_PACKET_9.0.1:
|
|
fstpl 40(%rsp)
|
|
fldl 32(%rsp)
|
|
jmp .L_2TAG_PACKET_7.0.1
|
|
.L_2TAG_PACKET_10.0.1:
|
|
fstpl 32(%rsp)
|
|
fldl 40(%rsp)
|
|
jmp .L_2TAG_PACKET_7.0.1
|
|
.L_2TAG_PACKET_2.0.1:
|
|
.L_2TAG_PACKET_7.0.1:
|
|
fstpl 16(%rsp)
|
|
movq 16(%rsp), %xmm0
|
|
addq $64, %rsp
|
|
ret
|
|
..B1.3:
|
|
..___tag_value_hypot.4:
|
|
END(hypot)
|
|
# -- End hypot
|
|
.section .rodata, "a"
|
|
.align 16
|
|
.align 16
|
|
static_const_table:
|
|
.long 4294967295
|
|
.long 2147483647
|
|
.long 4294967295
|
|
.long 2147483647
|
|
.long 0
|
|
.long 2146435072
|
|
.long 0
|
|
.long 2146435072
|
|
.type static_const_table,@object
|
|
.size static_const_table,32
|
|
.data
|
|
.section .note.GNU-stack, ""
|
|
// -- Begin DWARF2 SEGMENT .eh_frame
|
|
.section .eh_frame,"a",@progbits
|
|
.eh_frame_seg:
|
|
.align 1
|
|
.4byte 0x00000014
|
|
.8byte 0x00527a0100000000
|
|
.8byte 0x08070c1b01107801
|
|
.4byte 0x00000190
|
|
.4byte 0x00000014
|
|
.4byte 0x0000001c
|
|
.4byte ..___tag_value_hypot.1-.
|
|
.4byte ..___tag_value_hypot.4-..___tag_value_hypot.1
|
|
.2byte 0x0400
|
|
.4byte ..___tag_value_hypot.3-..___tag_value_hypot.1
|
|
.2byte 0x100e
|
|
# End
|