--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+func log10TrampolineSetup(x float64) float64
+func log10Asm(x float64) float64
+
+func cosTrampolineSetup(x float64) float64
+func cosAsm(x float64) float64
+
+func coshTrampolineSetup(x float64) float64
+func coshAsm(x float64) float64
+
+func sinTrampolineSetup(x float64) float64
+func sinAsm(x float64) float64
+
+func sinhTrampolineSetup(x float64) float64
+func sinhAsm(x float64) float64
+
+func tanhTrampolineSetup(x float64) float64
+func tanhAsm(x float64) float64
+
+// hasVectorFacility reports whether the machine has the z/Architecture
+// vector facility installed and enabled.
+func hasVectorFacility() bool
+
+var hasVX = hasVectorFacility()
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests whether the non vector routines are working, even when the tests are run on a
+// vector-capable machine.
+package math_test
+
+import (
+ . "math"
+ "testing"
+)
+
+func TestCosNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := CosNoVec(vf[i]); !veryclose(cos[i], f) {
+ t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i])
+ }
+ }
+ for i := 0; i < len(vfcosSC); i++ {
+ if f := CosNoVec(vfcosSC[i]); !alike(cosSC[i], f) {
+ t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i])
+ }
+ }
+}
+
+func TestCoshNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := CoshNoVec(vf[i]); !close(cosh[i], f) {
+ t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i])
+ }
+ }
+ for i := 0; i < len(vfcoshSC); i++ {
+ if f := CoshNoVec(vfcoshSC[i]); !alike(coshSC[i], f) {
+ t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i])
+ }
+ }
+}
+func TestSinNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := SinNoVec(vf[i]); !veryclose(sin[i], f) {
+ t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i])
+ }
+ }
+ for i := 0; i < len(vfsinSC); i++ {
+ if f := SinNoVec(vfsinSC[i]); !alike(sinSC[i], f) {
+ t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i])
+ }
+ }
+}
+
+func TestSinhNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := SinhNoVec(vf[i]); !close(sinh[i], f) {
+ t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i])
+ }
+ }
+ for i := 0; i < len(vfsinhSC); i++ {
+ if f := SinhNoVec(vfsinhSC[i]); !alike(sinhSC[i], f) {
+ t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i])
+ }
+ }
+}
+
+// Check that math functions of high angle values
+// return accurate results. [Since (vf[i] + large) - large != vf[i],
+// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is
+// a multiple of 2*Pi, is misleading.]
+func TestLargeCosNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ large := float64(100000 * Pi)
+ for i := 0; i < len(vf); i++ {
+ f1 := cosLarge[i]
+ f2 := CosNoVec(vf[i] + large)
+ if !close(f1, f2) {
+ t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1)
+ }
+ }
+}
+
+func TestLargeSinNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ large := float64(100000 * Pi)
+ for i := 0; i < len(vf); i++ {
+ f1 := sinLarge[i]
+ f2 := SinNoVec(vf[i] + large)
+ if !close(f1, f2) {
+ t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1)
+ }
+ }
+}
+
+func TestTanhNovec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ if f := TanhNoVec(vf[i]); !veryclose(tanh[i], f) {
+ t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i])
+ }
+ }
+ for i := 0; i < len(vftanhSC); i++ {
+ if f := TanhNoVec(vftanhSC[i]); !alike(tanhSC[i], f) {
+ t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i])
+ }
+ }
+
+}
+
+func TestLog10Novec(t *testing.T) {
+ if !HasVX {
+ t.Skipf("no vector support")
+ }
+ for i := 0; i < len(vf); i++ {
+ a := Abs(vf[i])
+ if f := Log10NoVec(a); !veryclose(log10[i], f) {
+ t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i])
+ }
+ }
+ if f := Log10NoVec(E); f != Log10E {
+ t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E)
+ }
+ for i := 0; i < len(vflogSC); i++ {
+ if f := Log10NoVec(vflogSC[i]); !alike(logSC[i], f) {
+ t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i])
+ }
+ }
+}
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Constants
+DATA coshrodataL23<>+0(SB)/8, $0.231904681384629956E-16
+DATA coshrodataL23<>+8(SB)/8, $0.693147180559945286E+00
+DATA coshrodataL23<>+16(SB)/8, $0.144269504088896339E+01
+DATA coshrodataL23<>+24(SB)/8, $704.E0
+GLOBL coshrodataL23<>+0(SB), RODATA, $32
+DATA coshxinf<>+0(SB)/8, $0x7FF0000000000000
+GLOBL coshxinf<>+0(SB), RODATA, $8
+DATA coshxlim1<>+0(SB)/8, $800.E0
+GLOBL coshxlim1<>+0(SB), RODATA, $8
+DATA coshxaddhy<>+0(SB)/8, $0xc2f0000100003fdf
+GLOBL coshxaddhy<>+0(SB), RODATA, $8
+DATA coshx4ff<>+0(SB)/8, $0x4ff0000000000000
+GLOBL coshx4ff<>+0(SB), RODATA, $8
+DATA coshe1<>+0(SB)/8, $0x3ff000000000000a
+GLOBL coshe1<>+0(SB), RODATA, $8
+
+// Log multiplier table
+DATA coshtab<>+0(SB)/8, $0.442737824274138381E-01
+DATA coshtab<>+8(SB)/8, $0.263602189790660309E-01
+DATA coshtab<>+16(SB)/8, $0.122565642281703586E-01
+DATA coshtab<>+24(SB)/8, $0.143757052860721398E-02
+DATA coshtab<>+32(SB)/8, $-.651375034121276075E-02
+DATA coshtab<>+40(SB)/8, $-.119317678849450159E-01
+DATA coshtab<>+48(SB)/8, $-.150868749549871069E-01
+DATA coshtab<>+56(SB)/8, $-.161992609578469234E-01
+DATA coshtab<>+64(SB)/8, $-.154492360403337917E-01
+DATA coshtab<>+72(SB)/8, $-.129850717389178721E-01
+DATA coshtab<>+80(SB)/8, $-.892902649276657891E-02
+DATA coshtab<>+88(SB)/8, $-.338202636596794887E-02
+DATA coshtab<>+96(SB)/8, $0.357266307045684762E-02
+DATA coshtab<>+104(SB)/8, $0.118665304327406698E-01
+DATA coshtab<>+112(SB)/8, $0.214434994118118914E-01
+DATA coshtab<>+120(SB)/8, $0.322580645161290314E-01
+GLOBL coshtab<>+0(SB), RODATA, $128
+
+// Minimax polynomial approximations
+DATA coshe2<>+0(SB)/8, $0.500000000000004237e+00
+GLOBL coshe2<>+0(SB), RODATA, $8
+DATA coshe3<>+0(SB)/8, $0.166666666630345592e+00
+GLOBL coshe3<>+0(SB), RODATA, $8
+DATA coshe4<>+0(SB)/8, $0.416666664838056960e-01
+GLOBL coshe4<>+0(SB), RODATA, $8
+DATA coshe5<>+0(SB)/8, $0.833349307718286047e-02
+GLOBL coshe5<>+0(SB), RODATA, $8
+DATA coshe6<>+0(SB)/8, $0.138926439368309441e-02
+GLOBL coshe6<>+0(SB), RODATA, $8
+
+// Cosh returns the hyperbolic cosine of x.
+//
+// Special cases are:
+// Cosh(±0) = 1
+// Cosh(±Inf) = +Inf
+// Cosh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·coshAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ MOVD $coshrodataL23<>+0(SB), R9
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ MOVD $0x4086000000000000, R2
+ MOVD $0x4086000000000000, R3
+ BLTU L19
+ FMOVD F0, F4
+L2:
+ WORD $0xED409018 //cdb %f4,.L24-.L23(%r9)
+ BYTE $0x00
+ BYTE $0x19
+ BGE L14 //jnl .L14
+ BVS L14
+ WFCEDBS V4, V4, V2
+ BEQ L20
+L1:
+ FMOVD F0, ret+8(FP)
+ RET
+
+L14:
+ WFCEDBS V4, V4, V2
+ BVS L1
+ MOVD $coshxlim1<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFCHEDBS V4, V2, V2
+ BEQ L21
+ MOVD $coshxaddhy<>+0(SB), R1
+ FMOVD coshrodataL23<>+16(SB), F5
+ FMOVD 0(R1), F2
+ WFMSDB V0, V5, V2, V5
+ FMOVD coshrodataL23<>+8(SB), F3
+ FADD F5, F2
+ MOVD $coshe6<>+0(SB), R1
+ WFMSDB V2, V3, V0, V3
+ FMOVD 0(R1), F6
+ WFMDB V3, V3, V1
+ MOVD $coshe4<>+0(SB), R1
+ FMOVD coshrodataL23<>+0(SB), F7
+ WFMADB V2, V7, V3, V2
+ FMOVD 0(R1), F3
+ MOVD $coshe5<>+0(SB), R1
+ WFMADB V1, V6, V3, V6
+ FMOVD 0(R1), F7
+ MOVD $coshe3<>+0(SB), R1
+ FMOVD 0(R1), F3
+ WFMADB V1, V7, V3, V7
+ FNEG F2, F3
+ WORD $0xB3CD0015 //lgdr %r1,%f5
+ MOVD $coshe2<>+0(SB), R3
+ WFCEDBS V4, V0, V0
+ FMOVD 0(R3), F5
+ MOVD $coshe1<>+0(SB), R3
+ WFMADB V1, V6, V5, V6
+ FMOVD 0(R3), F5
+ WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WFMADB V1, V7, V5, V1
+ BVS L22
+ WORD $0xEC4139BC //risbg %r4,%r1,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ MOVD $coshtab<>+0(SB), R3
+ WFMADB V3, V6, V1, V6
+ WORD $0x68043000 //ld %f0,0(%r4,%r3)
+ FMSUB F0, F3, F2, F2
+ WORD $0xA71AF000 //ahi %r1,-4096
+ WFMADB V2, V6, V0, V6
+L17:
+ WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10022 //ldgr %f2,%r2
+ FMADD F2, F6, F2, F2
+ MOVD $coshx4ff<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMUL F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L19:
+ FNEG F0, F4
+ BR L2
+L20:
+ MOVD $coshxaddhy<>+0(SB), R1
+ FMOVD coshrodataL23<>+16(SB), F3
+ FMOVD 0(R1), F2
+ WFMSDB V0, V3, V2, V3
+ FMOVD coshrodataL23<>+8(SB), F4
+ FADD F3, F2
+ MOVD $coshe6<>+0(SB), R1
+ FMSUB F4, F2, F0, F0
+ FMOVD 0(R1), F6
+ WFMDB V0, V0, V1
+ MOVD $coshe4<>+0(SB), R1
+ FMOVD 0(R1), F4
+ MOVD $coshe5<>+0(SB), R1
+ FMOVD coshrodataL23<>+0(SB), F5
+ WFMADB V1, V6, V4, V6
+ FMADD F5, F2, F0, F0
+ FMOVD 0(R1), F2
+ MOVD $coshe3<>+0(SB), R1
+ FMOVD 0(R1), F4
+ WFMADB V1, V2, V4, V2
+ MOVD $coshe2<>+0(SB), R1
+ FMOVD 0(R1), F5
+ FNEG F0, F4
+ WFMADB V1, V6, V5, V6
+ MOVD $coshe1<>+0(SB), R1
+ FMOVD 0(R1), F5
+ WFMADB V1, V2, V5, V1
+ WORD $0xB3CD0013 //lgdr %r1,%f3
+ MOVD $coshtab<>+0(SB), R5
+ WFMADB V4, V6, V1, V3
+ WORD $0xEC4139BC //risbg %r4,%r1,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ WFMSDB V4, V6, V1, V6
+ WORD $0x68145000 //ld %f1,0(%r4,%r5)
+ WFMSDB V4, V1, V0, V2
+ WORD $0xA7487FBE //lhi %r4,32702
+ FMADD F3, F2, F1, F1
+ SUBW R1, R4
+ WORD $0xECC439BC //risbg %r12,%r4,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ WORD $0x682C5000 //ld %f2,0(%r12,%r5)
+ FMSUB F2, F4, F0, F0
+ WORD $0xEC21000F //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WFMADB V0, V6, V2, V6
+ WORD $0xEC34000F //risbgn %r3,%r4,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10022 //ldgr %f2,%r2
+ WORD $0xB3C10003 //ldgr %f0,%r3
+ FMADD F2, F1, F2, F2
+ FMADD F0, F6, F0, F0
+ FADD F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L22:
+ WORD $0xA7387FBE //lhi %r3,32702
+ MOVD $coshtab<>+0(SB), R4
+ SUBW R1, R3
+ WFMSDB V3, V6, V1, V6
+ WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ WORD $0x68034000 //ld %f0,0(%r3,%r4)
+ FMSUB F0, F3, F2, F2
+ WORD $0xA7386FBE //lhi %r3,28606
+ WFMADB V2, V6, V0, V6
+ SUBW R1, R3, R1
+ BR L17
+L21:
+ MOVD $coshxinf<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Export internal functions and variable for testing.
+var Log10NoVec = log10
+var CosNoVec = cos
+var CoshNoVec = cosh
+var SinNoVec = sin
+var SinhNoVec = sinh
+var TanhNoVec = tanh
+var HasVX = hasVX
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial coefficients and other constants
+DATA log10rodataL19<>+0(SB)/8, $0.000000000000000000E+00
+DATA log10rodataL19<>+8(SB)/8, $-1.0
+DATA log10rodataL19<>+16(SB)/8, $0x7FF8000000000000 //+NanN
+DATA log10rodataL19<>+24(SB)/8, $.15375570329280596749
+DATA log10rodataL19<>+32(SB)/8, $.60171950900703668594E+04
+DATA log10rodataL19<>+40(SB)/8, $-1.9578460454940795898
+DATA log10rodataL19<>+48(SB)/8, $0.78962633073318517310E-01
+DATA log10rodataL19<>+56(SB)/8, $-.71784211884836937993E-02
+DATA log10rodataL19<>+64(SB)/8, $0.87011165920689940661E-03
+DATA log10rodataL19<>+72(SB)/8, $-.11865158981621437541E-03
+DATA log10rodataL19<>+80(SB)/8, $0.17258413403018680410E-04
+DATA log10rodataL19<>+88(SB)/8, $0.40752932047883484315E-06
+DATA log10rodataL19<>+96(SB)/8, $-.26149194688832680410E-05
+DATA log10rodataL19<>+104(SB)/8, $0.92453396963875026759E-08
+DATA log10rodataL19<>+112(SB)/8, $-.64572084905921579630E-07
+DATA log10rodataL19<>+120(SB)/8, $-5.5
+DATA log10rodataL19<>+128(SB)/8, $18446744073709551616.
+GLOBL log10rodataL19<>+0(SB), RODATA, $136
+
+// Table of log10 correction terms
+DATA log10tab2074<>+0(SB)/8, $0.254164497922885069E-01
+DATA log10tab2074<>+8(SB)/8, $0.179018857989381839E-01
+DATA log10tab2074<>+16(SB)/8, $0.118926768029048674E-01
+DATA log10tab2074<>+24(SB)/8, $0.722595568238080033E-02
+DATA log10tab2074<>+32(SB)/8, $0.376393570022739135E-02
+DATA log10tab2074<>+40(SB)/8, $0.138901135928814326E-02
+DATA log10tab2074<>+48(SB)/8, $0
+DATA log10tab2074<>+56(SB)/8, $-0.490780466387818203E-03
+DATA log10tab2074<>+64(SB)/8, $-0.159811431402137571E-03
+DATA log10tab2074<>+72(SB)/8, $0.925796337165100494E-03
+DATA log10tab2074<>+80(SB)/8, $0.270683176738357035E-02
+DATA log10tab2074<>+88(SB)/8, $0.513079030821304758E-02
+DATA log10tab2074<>+96(SB)/8, $0.815089785397996303E-02
+DATA log10tab2074<>+104(SB)/8, $0.117253060262419215E-01
+DATA log10tab2074<>+112(SB)/8, $0.158164239345343963E-01
+DATA log10tab2074<>+120(SB)/8, $0.203903595489229786E-01
+GLOBL log10tab2074<>+0(SB), RODATA, $128
+
+// Log10 returns the decimal logarithm of the argument.
+//
+// Special cases are:
+// Log(+Inf) = +Inf
+// Log(0) = -Inf
+// Log(x < 0) = NaN
+// Log(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·log10Asm(SB),NOSPLIT,$8-16
+ FMOVD x+0(FP), F0
+ MOVD $log10rodataL19<>+0(SB), R9
+ FMOVD F0, x-8(SP)
+ WORD $0xC0298006 //iilf %r2,2147909631
+ BYTE $0x7F
+ BYTE $0xFF
+ WORD $0x5840F008 //l %r4, 8(%r15)
+ SUBW R4, R2, R3
+ WORD $0xEC5320AF //risbg %r5,%r3,32,128+47,0
+ BYTE $0x00
+ BYTE $0x55
+ MOVH $0x0, R1
+ WORD $0xEC15001F //risbgn %r1,%r5,64-64+0,64-64+0+32-1,64-0-32
+ BYTE $0x20
+ BYTE $0x59
+ WORD $0xC0590016 //iilf %r5,1507327
+ BYTE $0xFF
+ BYTE $0xFF
+ MOVW R4, R10
+ MOVW R5, R11
+ CMPBLE R10, R11, L2
+ WORD $0xC0297FEF //iilf %r2,2146435071
+ BYTE $0xFF
+ BYTE $0xFF
+ MOVW R4, R10
+ MOVW R2, R11
+ CMPBLE R10, R11, L16
+L3:
+L1:
+ FMOVD F0, ret+8(FP)
+ RET
+
+L2:
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ BLEU L13
+ WORD $0xED009080 //mdb %f0,.L20-.L19(%r9)
+ BYTE $0x00
+ BYTE $0x1C
+ FMOVD F0, x-8(SP)
+ WORD $0x5B20F008 //s %r2, 8(%r15)
+ WORD $0xEC3239BC //risbg %r3,%r2,57,128+60,64-13
+ BYTE $0x33
+ BYTE $0x55
+ ANDW $0xFFFF0000, R2
+ WORD $0xEC12001F //risbgn %r1,%r2,64-64+0,64-64+0+32-1,64-0-32
+ BYTE $0x20
+ BYTE $0x59
+ ADDW $0x4000000, R2
+ BLEU L17
+L8:
+ SRW $8, R2, R2
+ ORW $0x45000000, R2
+L4:
+ FMOVD log10rodataL19<>+120(SB), F2
+ WORD $0xB3C10041 //ldgr %f4,%r1
+ WFMADB V4, V0, V2, V0
+ FMOVD log10rodataL19<>+112(SB), F4
+ FMOVD log10rodataL19<>+104(SB), F6
+ WFMADB V0, V6, V4, V6
+ FMOVD log10rodataL19<>+96(SB), F4
+ FMOVD log10rodataL19<>+88(SB), F1
+ WFMADB V0, V1, V4, V1
+ WFMDB V0, V0, V4
+ FMOVD log10rodataL19<>+80(SB), F2
+ WFMADB V6, V4, V1, V6
+ FMOVD log10rodataL19<>+72(SB), F1
+ WFMADB V0, V2, V1, V2
+ FMOVD log10rodataL19<>+64(SB), F1
+ WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,0
+ BYTE $0x00
+ BYTE $0x55
+ WFMADB V4, V6, V2, V6
+ FMOVD log10rodataL19<>+56(SB), F2
+ WFMADB V0, V1, V2, V1
+ VLVGF $0, R2, V2
+ WFMADB V4, V6, V1, V4
+ LDEBR F2, F2
+ FMOVD log10rodataL19<>+48(SB), F6
+ WFMADB V0, V4, V6, V4
+ FMOVD log10rodataL19<>+40(SB), F1
+ FMOVD log10rodataL19<>+32(SB), F6
+ MOVD $log10tab2074<>+0(SB), R1
+ WFMADB V2, V1, V6, V2
+ WORD $0x68331000 //ld %f3,0(%r3,%r1)
+ WFMADB V0, V4, V3, V0
+ FMOVD log10rodataL19<>+24(SB), F4
+ FMADD F4, F2, F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L16:
+ WORD $0xEC2328B7 //risbg %r2,%r3,40,128+55,64-8
+ BYTE $0x38
+ BYTE $0x55
+ WORD $0xEC3339BC //risbg %r3,%r3,57,128+60,64-13
+ BYTE $0x33
+ BYTE $0x55
+ ORW $0x45000000, R2
+ BR L4
+L13:
+ BGE L18 //jnl .L18
+ BVS L18
+ FMOVD log10rodataL19<>+16(SB), F0
+ BR L1
+L17:
+ SRAW $1, R2, R2
+ SUBW $0x40000000, R2
+ BR L8
+L18:
+ FMOVD log10rodataL19<>+8(SB), F0
+ WORD $0xED009000 //ddb %f0,.L36-.L19(%r9)
+ BYTE $0x00
+ BYTE $0x1D
+ BR L1
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Various constants
+DATA sincosxnan<>+0(SB)/8, $0x7ff8000000000000
+GLOBL sincosxnan<>+0(SB), RODATA, $8
+DATA sincosxlim<>+0(SB)/8, $0x432921fb54442d19
+GLOBL sincosxlim<>+0(SB), RODATA, $8
+DATA sincosxadd<>+0(SB)/8, $0xc338000000000000
+GLOBL sincosxadd<>+0(SB), RODATA, $8
+DATA sincosxpi2l<>+0(SB)/8, $0.108285667392191389e-31
+GLOBL sincosxpi2l<>+0(SB), RODATA, $8
+DATA sincosxpi2m<>+0(SB)/8, $0.612323399573676480e-16
+GLOBL sincosxpi2m<>+0(SB), RODATA, $8
+DATA sincosxpi2h<>+0(SB)/8, $0.157079632679489656e+01
+GLOBL sincosxpi2h<>+0(SB), RODATA, $8
+DATA sincosrpi2<>+0(SB)/8, $0.636619772367581341e+00
+GLOBL sincosrpi2<>+0(SB), RODATA, $8
+
+// Minimax polynomial approximations
+DATA sincosc0<>+0(SB)/8, $0.100000000000000000E+01
+GLOBL sincosc0<>+0(SB), RODATA, $8
+DATA sincosc1<>+0(SB)/8, $-.499999999999999833E+00
+GLOBL sincosc1<>+0(SB), RODATA, $8
+DATA sincosc2<>+0(SB)/8, $0.416666666666625843E-01
+GLOBL sincosc2<>+0(SB), RODATA, $8
+DATA sincosc3<>+0(SB)/8, $-.138888888885498984E-02
+GLOBL sincosc3<>+0(SB), RODATA, $8
+DATA sincosc4<>+0(SB)/8, $0.248015871681607202E-04
+GLOBL sincosc4<>+0(SB), RODATA, $8
+DATA sincosc5<>+0(SB)/8, $-.275572911309937875E-06
+GLOBL sincosc5<>+0(SB), RODATA, $8
+DATA sincosc6<>+0(SB)/8, $0.208735047247632818E-08
+GLOBL sincosc6<>+0(SB), RODATA, $8
+DATA sincosc7<>+0(SB)/8, $-.112753632738365317E-10
+GLOBL sincosc7<>+0(SB), RODATA, $8
+DATA sincoss0<>+0(SB)/8, $0.100000000000000000E+01
+GLOBL sincoss0<>+0(SB), RODATA, $8
+DATA sincoss1<>+0(SB)/8, $-.166666666666666657E+00
+GLOBL sincoss1<>+0(SB), RODATA, $8
+DATA sincoss2<>+0(SB)/8, $0.833333333333309209E-02
+GLOBL sincoss2<>+0(SB), RODATA, $8
+DATA sincoss3<>+0(SB)/8, $-.198412698410701448E-03
+GLOBL sincoss3<>+0(SB), RODATA, $8
+DATA sincoss4<>+0(SB)/8, $0.275573191453906794E-05
+GLOBL sincoss4<>+0(SB), RODATA, $8
+DATA sincoss5<>+0(SB)/8, $-.250520918387633290E-07
+GLOBL sincoss5<>+0(SB), RODATA, $8
+DATA sincoss6<>+0(SB)/8, $0.160571285514715856E-09
+GLOBL sincoss6<>+0(SB), RODATA, $8
+DATA sincoss7<>+0(SB)/8, $-.753213484933210972E-12
+GLOBL sincoss7<>+0(SB), RODATA, $8
+
+// Sin returns the sine of the radian argument x.
+//
+// Special cases are:
+// Sin(±0) = ±0
+// Sin(±Inf) = NaN
+// Sin(NaN) = NaN
+// The algorithm used is minimax polynomial approximation.
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·sinAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ //special case Sin(±0) = ±0
+ FMOVD $(0.0), F1
+ FCMPU F0, F1
+ BEQ sinIsZero
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ BLTU L17
+ FMOVD F0, F5
+L2:
+ MOVD $sincoss7<>+0(SB), R1
+ FMOVD 0(R1), F4
+ MOVD $sincoss6<>+0(SB), R1
+ FMOVD 0(R1), F1
+ MOVD $sincoss5<>+0(SB), R1
+ VLEG $0, 0(R1), V18
+ MOVD $sincoss4<>+0(SB), R1
+ FMOVD 0(R1), F6
+ MOVD $sincoss2<>+0(SB), R1
+ VLEG $0, 0(R1), V16
+ MOVD $sincoss3<>+0(SB), R1
+ FMOVD 0(R1), F7
+ MOVD $sincoss1<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincoss0<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFCHDBS V2, V5, V2
+ BEQ L18
+ MOVD $sincosrpi2<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincosxadd<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFMSDB V0, V3, V2, V3
+ FMOVD 0(R1), F6
+ FADD F3, F6
+ MOVD $sincosxpi2h<>+0(SB), R1
+ FMOVD 0(R1), F2
+ FMSUB F2, F6, F0, F0
+ MOVD $sincosxpi2m<>+0(SB), R1
+ FMOVD 0(R1), F4
+ FMADD F4, F6, F0, F0
+ MOVD $sincosxpi2l<>+0(SB), R1
+ WFMDB V0, V0, V1
+ FMOVD 0(R1), F7
+ WFMDB V1, V1, V2
+ WORD $0xB3CD0013 //lgdr %r1,%f3
+ MOVD $sincosxlim<>+0(SB), R2
+ WORD $0xA7110001 //tmll %r1,1
+ BEQ L6
+ FMOVD 0(R2), F0
+ WFCHDBS V0, V5, V0
+ BNE L14
+ MOVD $sincosc7<>+0(SB), R2
+ FMOVD 0(R2), F0
+ MOVD $sincosc6<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincosc5<>+0(SB), R2
+ WFMADB V1, V0, V4, V0
+ FMOVD 0(R2), F6
+ MOVD $sincosc4<>+0(SB), R2
+ WFMADB V1, V0, V6, V0
+ FMOVD 0(R2), F4
+ MOVD $sincosc2<>+0(SB), R2
+ FMOVD 0(R2), F6
+ WFMADB V2, V4, V6, V4
+ MOVD $sincosc3<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincosc1<>+0(SB), R2
+ WFMADB V2, V0, V3, V0
+ FMOVD 0(R2), F6
+ WFMADB V1, V4, V6, V4
+ WORD $0xA7110002 //tmll %r1,2
+ WFMADB V2, V0, V4, V0
+ MOVD $sincosc0<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFMADB V1, V0, V2, V0
+ BNE L15
+ FMOVD F0, ret+8(FP)
+ RET
+
+L6:
+ FMOVD 0(R2), F4
+ WFCHDBS V4, V5, V4
+ BNE L14
+ MOVD $sincoss7<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincoss6<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincoss5<>+0(SB), R2
+ WFMADB V1, V4, V3, V4
+ WFMADB V6, V7, V0, V6
+ FMOVD 0(R2), F0
+ MOVD $sincoss4<>+0(SB), R2
+ FMADD F4, F1, F0, F0
+ FMOVD 0(R2), F3
+ MOVD $sincoss2<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincoss3<>+0(SB), R2
+ WFMADB V2, V3, V4, V3
+ FMOVD 0(R2), F4
+ MOVD $sincoss1<>+0(SB), R2
+ WFMADB V2, V0, V4, V0
+ FMOVD 0(R2), F4
+ WFMADB V1, V3, V4, V3
+ FNEG F6, F4
+ WFMADB V2, V0, V3, V2
+ WFMDB V4, V1, V0
+ WORD $0xA7110002 //tmll %r1,2
+ WFMSDB V0, V2, V6, V0
+ BNE L15
+ FMOVD F0, ret+8(FP)
+ RET
+
+L14:
+ MOVD $sincosxnan<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L18:
+ WFMDB V0, V0, V2
+ WFMADB V2, V4, V1, V4
+ WFMDB V2, V2, V1
+ WFMADB V2, V4, V18, V4
+ WFMADB V1, V6, V16, V6
+ WFMADB V1, V4, V7, V4
+ WFMADB V2, V6, V3, V6
+ FMUL F0, F2
+ WFMADB V1, V4, V6, V4
+ FMADD F4, F2, F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L17:
+ FNEG F0, F5
+ BR L2
+L15:
+ FNEG F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+
+sinIsZero:
+ FMOVD F0, ret+8(FP)
+ RET
+
+// Cos returns the cosine of the radian argument.
+//
+// Special cases are:
+// Cos(±Inf) = NaN
+// Cos(NaN) = NaN
+// The algorithm used is minimax polynomial approximation.
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·cosAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ BLTU L35
+ FMOVD F0, F1
+L21:
+ MOVD $sincosc7<>+0(SB), R1
+ FMOVD 0(R1), F4
+ MOVD $sincosc6<>+0(SB), R1
+ VLEG $0, 0(R1), V20
+ MOVD $sincosc5<>+0(SB), R1
+ VLEG $0, 0(R1), V18
+ MOVD $sincosc4<>+0(SB), R1
+ FMOVD 0(R1), F6
+ MOVD $sincosc2<>+0(SB), R1
+ VLEG $0, 0(R1), V16
+ MOVD $sincosc3<>+0(SB), R1
+ FMOVD 0(R1), F7
+ MOVD $sincosc1<>+0(SB), R1
+ FMOVD 0(R1), F5
+ MOVD $sincosrpi2<>+0(SB), R1
+ FMOVD 0(R1), F2
+ MOVD $sincosxadd<>+0(SB), R1
+ FMOVD 0(R1), F3
+ MOVD $sincoss0<>+0(SB), R1
+ WFMSDB V0, V2, V3, V2
+ FMOVD 0(R1), F3
+ WFCHDBS V3, V1, V3
+ WORD $0xB3CD0012 //lgdr %r1,%f2
+ BEQ L36
+ MOVD $sincosxadd<>+0(SB), R2
+ FMOVD 0(R2), F4
+ FADD F2, F4
+ MOVD $sincosxpi2h<>+0(SB), R2
+ FMOVD 0(R2), F2
+ WFMSDB V4, V2, V0, V2
+ MOVD $sincosxpi2m<>+0(SB), R2
+ FMOVD 0(R2), F0
+ WFMADB V4, V0, V2, V0
+ MOVD $sincosxpi2l<>+0(SB), R2
+ WFMDB V0, V0, V2
+ FMOVD 0(R2), F5
+ WFMDB V2, V2, V6
+ MOVD $sincosxlim<>+0(SB), R2
+ WORD $0xA7110001 //tmll %r1,1
+ BNE L25
+ FMOVD 0(R2), F0
+ WFCHDBS V0, V1, V0
+ BNE L33
+ MOVD $sincosc7<>+0(SB), R2
+ FMOVD 0(R2), F0
+ MOVD $sincosc6<>+0(SB), R2
+ FMOVD 0(R2), F4
+ MOVD $sincosc5<>+0(SB), R2
+ WFMADB V2, V0, V4, V0
+ FMOVD 0(R2), F1
+ MOVD $sincosc4<>+0(SB), R2
+ WFMADB V2, V0, V1, V0
+ FMOVD 0(R2), F4
+ MOVD $sincosc2<>+0(SB), R2
+ FMOVD 0(R2), F1
+ WFMADB V6, V4, V1, V4
+ MOVD $sincosc3<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincosc1<>+0(SB), R2
+ WFMADB V6, V0, V3, V0
+ FMOVD 0(R2), F1
+ WFMADB V2, V4, V1, V4
+ WORD $0xA7110002 //tmll %r1,2
+ WFMADB V6, V0, V4, V0
+ MOVD $sincosc0<>+0(SB), R1
+ FMOVD 0(R1), F4
+ WFMADB V2, V0, V4, V0
+ BNE L34
+ FMOVD F0, ret+8(FP)
+ RET
+
+L25:
+ FMOVD 0(R2), F3
+ WFCHDBS V3, V1, V1
+ BNE L33
+ MOVD $sincoss7<>+0(SB), R2
+ FMOVD 0(R2), F1
+ MOVD $sincoss6<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sincoss5<>+0(SB), R2
+ WFMADB V2, V1, V3, V1
+ FMOVD 0(R2), F3
+ MOVD $sincoss4<>+0(SB), R2
+ WFMADB V2, V1, V3, V1
+ FMOVD 0(R2), F3
+ MOVD $sincoss2<>+0(SB), R2
+ FMOVD 0(R2), F7
+ WFMADB V6, V3, V7, V3
+ MOVD $sincoss3<>+0(SB), R2
+ FMADD F5, F4, F0, F0
+ FMOVD 0(R2), F4
+ MOVD $sincoss1<>+0(SB), R2
+ FMADD F1, F6, F4, F4
+ FMOVD 0(R2), F1
+ FMADD F3, F2, F1, F1
+ FMUL F0, F2
+ WFMADB V6, V4, V1, V6
+ WORD $0xA7110002 //tmll %r1,2
+ FMADD F6, F2, F0, F0
+ BNE L34
+ FMOVD F0, ret+8(FP)
+ RET
+
+L33:
+ MOVD $sincosxnan<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L36:
+ FMUL F0, F0
+ MOVD $sincosc0<>+0(SB), R1
+ WFMDB V0, V0, V1
+ WFMADB V0, V4, V20, V4
+ WFMADB V1, V6, V16, V6
+ WFMADB V0, V4, V18, V4
+ WFMADB V0, V6, V5, V6
+ WFMADB V1, V4, V7, V4
+ FMOVD 0(R1), F2
+ WFMADB V1, V4, V6, V4
+ WFMADB V0, V4, V2, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L35:
+ FNEG F0, F1
+ BR L21
+L34:
+ FNEG F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
// Sinh(±0) = ±0
// Sinh(±Inf) = ±Inf
// Sinh(NaN) = NaN
-func Sinh(x float64) float64 {
+func Sinh(x float64) float64
+
+func sinh(x float64) float64 {
// The coefficients are #2029 from Hart & Cheney. (20.36D)
const (
P0 = -0.6307673640497716991184787251e+6
// Cosh(±0) = 1
// Cosh(±Inf) = +Inf
// Cosh(NaN) = NaN
-func Cosh(x float64) float64 {
+func Cosh(x float64) float64
+
+func cosh(x float64) float64 {
if x < 0 {
x = -x
}
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+#include "textflag.h"
+
+// Constants
+DATA sinhrodataL21<>+0(SB)/8, $0.231904681384629956E-16
+DATA sinhrodataL21<>+8(SB)/8, $0.693147180559945286E+00
+DATA sinhrodataL21<>+16(SB)/8, $704.E0
+GLOBL sinhrodataL21<>+0(SB), RODATA, $24
+DATA sinhrlog2<>+0(SB)/8, $0x3ff7154760000000
+GLOBL sinhrlog2<>+0(SB), RODATA, $8
+DATA sinhxinf<>+0(SB)/8, $0x7ff0000000000000
+GLOBL sinhxinf<>+0(SB), RODATA, $8
+DATA sinhxinit<>+0(SB)/8, $0x3ffb504f333f9de6
+GLOBL sinhxinit<>+0(SB), RODATA, $8
+DATA sinhxlim1<>+0(SB)/8, $800.E0
+GLOBL sinhxlim1<>+0(SB), RODATA, $8
+DATA sinhxadd<>+0(SB)/8, $0xc3200001610007fb
+GLOBL sinhxadd<>+0(SB), RODATA, $8
+DATA sinhx4ff<>+0(SB)/8, $0x4ff0000000000000
+GLOBL sinhx4ff<>+0(SB), RODATA, $8
+
+// Minimax polynomial approximations
+DATA sinhe0<>+0(SB)/8, $0.11715728752538099300E+01
+GLOBL sinhe0<>+0(SB), RODATA, $8
+DATA sinhe1<>+0(SB)/8, $0.11715728752538099300E+01
+GLOBL sinhe1<>+0(SB), RODATA, $8
+DATA sinhe2<>+0(SB)/8, $0.58578643762688526692E+00
+GLOBL sinhe2<>+0(SB), RODATA, $8
+DATA sinhe3<>+0(SB)/8, $0.19526214587563004497E+00
+GLOBL sinhe3<>+0(SB), RODATA, $8
+DATA sinhe4<>+0(SB)/8, $0.48815536475176217404E-01
+GLOBL sinhe4<>+0(SB), RODATA, $8
+DATA sinhe5<>+0(SB)/8, $0.97631072948627397816E-02
+GLOBL sinhe5<>+0(SB), RODATA, $8
+DATA sinhe6<>+0(SB)/8, $0.16271839297756073153E-02
+GLOBL sinhe6<>+0(SB), RODATA, $8
+DATA sinhe7<>+0(SB)/8, $0.23245485387271142509E-03
+GLOBL sinhe7<>+0(SB), RODATA, $8
+DATA sinhe8<>+0(SB)/8, $0.29080955860869629131E-04
+GLOBL sinhe8<>+0(SB), RODATA, $8
+DATA sinhe9<>+0(SB)/8, $0.32311267157667725278E-05
+GLOBL sinhe9<>+0(SB), RODATA, $8
+
+// Sinh returns the hyperbolic sine of the argument.
+//
+// Special cases are:
+// Sinh(±0) = ±0
+// Sinh(±Inf) = ±Inf
+// Sinh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation
+// with coefficients determined with a Remez exchange algorithm.
+
+TEXT ·sinhAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ //specail case Sinh(±0) = ±0
+ FMOVD $(0.0), F1
+ FCMPU F0, F1
+ BEQ sinhIsZero
+ //specail case Sinh(±Inf = ±Inf
+ FMOVD $1.797693134862315708145274237317043567981e+308, F1
+ FCMPU F1, F0
+ BLEU sinhIsInf
+ FMOVD $-1.797693134862315708145274237317043567981e+308, F1
+ FCMPU F1, F0
+ BGT sinhIsInf
+
+ MOVD $sinhrodataL21<>+0(SB), R5
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ MOVD sinhxinit<>+0(SB), R1
+ FMOVD F0, F4
+ MOVD R1, R3
+ BLTU L19
+ FMOVD F0, F2
+L2:
+ WORD $0xED205010 //cdb %f2,.L22-.L21(%r5)
+ BYTE $0x00
+ BYTE $0x19
+ BGE L15 //jnl .L15
+ BVS L15
+ WFCEDBS V2, V2, V0
+ BEQ L20
+L12:
+ FMOVD F4, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L15:
+ WFCEDBS V2, V2, V0
+ BVS L12
+ MOVD $sinhxlim1<>+0(SB), R2
+ FMOVD 0(R2), F0
+ WFCHDBS V0, V2, V0
+ BEQ L6
+ WFCHEDBS V4, V2, V6
+ MOVD $sinhxinf<>+0(SB), R1
+ FMOVD 0(R1), F0
+ BNE LEXITTAGsinh
+ WFCHDBS V2, V4, V2
+ BNE L16
+ FNEG F0, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L19:
+ FNEG F0, F2
+ BR L2
+L6:
+ MOVD $sinhxadd<>+0(SB), R2
+ FMOVD 0(R2), F0
+ MOVD sinhrlog2<>+0(SB), R2
+ WORD $0xB3C10062 //ldgr %f6,%r2
+ WFMSDB V4, V6, V0, V16
+ FMOVD sinhrodataL21<>+8(SB), F6
+ WFADB V0, V16, V0
+ FMOVD sinhrodataL21<>+0(SB), F3
+ WFMSDB V0, V6, V4, V6
+ MOVD $sinhe9<>+0(SB), R2
+ WFMADB V0, V3, V6, V0
+ FMOVD 0(R2), F1
+ MOVD $sinhe7<>+0(SB), R2
+ WFMDB V0, V0, V6
+ FMOVD 0(R2), F5
+ MOVD $sinhe8<>+0(SB), R2
+ FMOVD 0(R2), F3
+ MOVD $sinhe6<>+0(SB), R2
+ WFMADB V6, V1, V5, V1
+ FMOVD 0(R2), F5
+ MOVD $sinhe5<>+0(SB), R2
+ FMOVD 0(R2), F7
+ MOVD $sinhe3<>+0(SB), R2
+ WFMADB V6, V3, V5, V3
+ FMOVD 0(R2), F5
+ MOVD $sinhe4<>+0(SB), R2
+ WFMADB V6, V7, V5, V7
+ FMOVD 0(R2), F5
+ MOVD $sinhe2<>+0(SB), R2
+ VLEG $0, 0(R2), V20
+ WFMDB V6, V6, V18
+ WFMADB V6, V5, V20, V5
+ WFMADB V1, V18, V7, V1
+ FNEG F0, F0
+ WFMADB V3, V18, V5, V3
+ MOVD $sinhe1<>+0(SB), R3
+ WFCEDBS V2, V4, V2
+ FMOVD 0(R3), F5
+ MOVD $sinhe0<>+0(SB), R3
+ WFMADB V6, V1, V5, V1
+ FMOVD 0(R3), F5
+ VLGVG $0, V16, R2
+ WFMADB V6, V3, V5, V6
+ RLL $3, R2, R2
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ BEQ L9
+ WFMSDB V0, V1, V6, V0
+ MOVD $sinhx4ff<>+0(SB), R3
+ FNEG F0, F0
+ FMOVD 0(R3), F2
+ FMUL F2, F0
+ ANDW $0xFFFF, R2
+ WORD $0xA53FEFB6 //llill %r3,61366
+ SUBW R2, R3, R2
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10021 //ldgr %f2,%r1
+ FMUL F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L20:
+ MOVD $sinhxadd<>+0(SB), R2
+ FMOVD 0(R2), F2
+ MOVD sinhrlog2<>+0(SB), R2
+ WORD $0xB3C10002 //ldgr %f0,%r2
+ WFMSDB V4, V0, V2, V6
+ FMOVD sinhrodataL21<>+8(SB), F0
+ FADD F6, F2
+ MOVD $sinhe9<>+0(SB), R2
+ FMSUB F0, F2, F4, F4
+ FMOVD 0(R2), F1
+ FMOVD sinhrodataL21<>+0(SB), F3
+ MOVD $sinhe7<>+0(SB), R2
+ FMADD F3, F2, F4, F4
+ FMOVD 0(R2), F0
+ MOVD $sinhe8<>+0(SB), R2
+ WFMDB V4, V4, V2
+ FMOVD 0(R2), F3
+ MOVD $sinhe6<>+0(SB), R2
+ FMOVD 0(R2), F5
+ WORD $0xB3CD0026 //lgdr %r2,%f6
+ RLL $3, R2, R2
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WFMADB V2, V1, V0, V1
+ WORD $0xB3C10001 //ldgr %f0,%r1
+ MOVD $sinhe5<>+0(SB), R1
+ WFMADB V2, V3, V5, V3
+ FMOVD 0(R1), F5
+ MOVD $sinhe3<>+0(SB), R1
+ FMOVD 0(R1), F6
+ WFMDB V2, V2, V7
+ WFMADB V2, V5, V6, V5
+ WORD $0xA7487FB6 //lhi %r4,32694
+ FNEG F4, F4
+ ANDW $0xFFFF, R2
+ SUBW R2, R4, R2
+ WORD $0xEC32000F //risbgn %r3,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10063 //ldgr %f6,%r3
+ WFADB V0, V6, V16
+ MOVD $sinhe4<>+0(SB), R1
+ WFMADB V1, V7, V5, V1
+ WFMDB V4, V16, V4
+ FMOVD 0(R1), F5
+ MOVD $sinhe2<>+0(SB), R1
+ VLEG $0, 0(R1), V16
+ MOVD $sinhe1<>+0(SB), R1
+ WFMADB V2, V5, V16, V5
+ VLEG $0, 0(R1), V16
+ WFMADB V3, V7, V5, V3
+ WFMADB V2, V1, V16, V1
+ FSUB F6, F0
+ FMUL F1, F4
+ MOVD $sinhe0<>+0(SB), R1
+ FMOVD 0(R1), F6
+ WFMADB V2, V3, V6, V2
+ WFMADB V0, V2, V4, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L9:
+ WFMADB V0, V1, V6, V0
+ MOVD $sinhx4ff<>+0(SB), R3
+ FMOVD 0(R3), F2
+ FMUL F2, F0
+ WORD $0xA72AF000 //ahi %r2,-4096
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0xB3C10021 //ldgr %f2,%r1
+ FMUL F2, F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L16:
+ FMOVD F0, ret+8(FP)
+ RET
+
+LEXITTAGsinh:
+sinhIsInf:
+sinhIsZero:
+ FMOVD F0, ret+8(FP)
+ RET
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 amd64 amd64p32 arm
+
+#include "textflag.h"
+
+TEXT ·Sinh(SB),NOSPLIT,$0
+ JMP ·sinh(SB)
+
+TEXT ·Cosh(SB),NOSPLIT,$0
+ JMP ·cosh(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ JMP ·tanh(SB)
+
TEXT ·Exp2(SB),NOSPLIT,$0
B ·exp2(SB)
+TEXT ·Cosh(SB),NOSPLIT,$0
+ B ·cosh(SB)
+
TEXT ·Expm1(SB),NOSPLIT,$0
B ·expm1(SB)
TEXT ·Sin(SB),NOSPLIT,$0
B ·sin(SB)
+TEXT ·Sinh(SB),NOSPLIT,$0
+ B ·sinh(SB)
+
TEXT ·Cos(SB),NOSPLIT,$0
B ·cos(SB)
TEXT ·Tan(SB),NOSPLIT,$0
B ·tan(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ B ·tanh(SB)
TEXT ·Sin(SB),NOSPLIT,$0
JMP ·sin(SB)
+TEXT ·Sinh(SB),NOSPLIT,$0
+ JMP ·sinh(SB)
+
TEXT ·Cos(SB),NOSPLIT,$0
JMP ·cos(SB)
+TEXT ·Cosh(SB),NOSPLIT,$0
+ JMP ·cosh(SB)
+
TEXT ·Sqrt(SB),NOSPLIT,$0
JMP ·sqrt(SB)
TEXT ·Tan(SB),NOSPLIT,$0
JMP ·tan(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ JMP ·tanh(SB)
TEXT ·Sin(SB),NOSPLIT,$0
JMP ·sin(SB)
+TEXT ·Sinh(SB),NOSPLIT,$0
+ JMP ·sinh(SB)
+
TEXT ·Cos(SB),NOSPLIT,$0
JMP ·cos(SB)
+TEXT ·Cosh(SB),NOSPLIT,$0
+ JMP ·cosh(SB)
+
TEXT ·Tan(SB),NOSPLIT,$0
JMP ·tan(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ JMP ·tanh(SB)
+
TEXT ·Sin(SB),NOSPLIT,$0
BR ·sin(SB)
+TEXT ·Sinh(SB),NOSPLIT,$0
+ BR ·sinh(SB)
+
TEXT ·Cos(SB),NOSPLIT,$0
BR ·cos(SB)
+TEXT ·Cosh(SB),NOSPLIT,$0
+ BR ·cosh(SB)
+
TEXT ·Tan(SB),NOSPLIT,$0
BR ·tan(SB)
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ BR ·tanh(SB)
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "../runtime/textflag.h"
+#include "textflag.h"
TEXT ·Asin(SB),NOSPLIT,$0
BR ·asin(SB)
TEXT ·Ldexp(SB),NOSPLIT,$0
BR ·ldexp(SB)
-TEXT ·Log10(SB),NOSPLIT,$0
- BR ·log10(SB)
-
TEXT ·Log2(SB),NOSPLIT,$0
BR ·log2(SB)
TEXT ·Sincos(SB),NOSPLIT,$0
BR ·sincos(SB)
-TEXT ·Sin(SB),NOSPLIT,$0
- BR ·sin(SB)
+TEXT ·Tan(SB),NOSPLIT,$0
+ BR ·tan(SB)
+
+//if go assembly use vector instruction
+TEXT ·hasVectorFacility(SB),NOSPLIT,$24-1
+ MOVD $x-24(SP), R1
+ XC $24, 0(R1), 0(R1) // clear the storage
+ MOVD $2, R0 // R0 is the number of double words stored -1
+ WORD $0xB2B01000 // STFLE 0(R1)
+ XOR R0, R0 // reset the value of R0
+ MOVBZ z-8(SP), R1
+ AND $0x40, R1
+ BEQ novector
+vectorinstalled:
+ // check if the vector instruction has been enabled
+ VLEIB $0, $0xF, V16
+ VLGVB $0, V16, R1
+ CMPBNE R1, $0xF, novector
+ MOVB $1, ret+0(FP) // have vx
+ RET
+novector:
+ MOVB $0, ret+0(FP) // no vx
+ RET
+
+TEXT ·Log10(SB),NOSPLIT,$0
+ MOVD log10vectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·log10TrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $log10vectorfacility+0x00(SB), R1
+ MOVD $·log10(SB), R2
+ MOVD R2, 0(R1)
+ BR ·log10(SB)
+vectorimpl:
+ MOVD $log10vectorfacility+0x00(SB), R1
+ MOVD $·log10Asm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·log10Asm(SB)
+
+GLOBL log10vectorfacility+0x00(SB), NOPTR, $8
+DATA log10vectorfacility+0x00(SB)/8, $·log10TrampolineSetup(SB)
+
TEXT ·Cos(SB),NOSPLIT,$0
- BR ·cos(SB)
+ MOVD cosvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·cosTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $cosvectorfacility+0x00(SB), R1
+ MOVD $·cos(SB), R2
+ MOVD R2, 0(R1)
+ BR ·cos(SB)
+vectorimpl:
+ MOVD $cosvectorfacility+0x00(SB), R1
+ MOVD $·cosAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·cosAsm(SB)
+
+GLOBL cosvectorfacility+0x00(SB), NOPTR, $8
+DATA cosvectorfacility+0x00(SB)/8, $·cosTrampolineSetup(SB)
+
+
+TEXT ·Cosh(SB),NOSPLIT,$0
+ MOVD coshvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·coshTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $coshvectorfacility+0x00(SB), R1
+ MOVD $·cosh(SB), R2
+ MOVD R2, 0(R1)
+ BR ·cosh(SB)
+vectorimpl:
+ MOVD $coshvectorfacility+0x00(SB), R1
+ MOVD $·coshAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·coshAsm(SB)
+
+GLOBL coshvectorfacility+0x00(SB), NOPTR, $8
+DATA coshvectorfacility+0x00(SB)/8, $·coshTrampolineSetup(SB)
+
+
+TEXT ·Sin(SB),NOSPLIT,$0
+ MOVD sinvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·sinTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $sinvectorfacility+0x00(SB), R1
+ MOVD $·sin(SB), R2
+ MOVD R2, 0(R1)
+ BR ·sin(SB)
+vectorimpl:
+ MOVD $sinvectorfacility+0x00(SB), R1
+ MOVD $·sinAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·sinAsm(SB)
+
+GLOBL sinvectorfacility+0x00(SB), NOPTR, $8
+DATA sinvectorfacility+0x00(SB)/8, $·sinTrampolineSetup(SB)
+
+
+TEXT ·Sinh(SB),NOSPLIT,$0
+ MOVD sinhvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·sinhTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $sinhvectorfacility+0x00(SB), R1
+ MOVD $·sinh(SB), R2
+ MOVD R2, 0(R1)
+ BR ·sinh(SB)
+vectorimpl:
+ MOVD $sinhvectorfacility+0x00(SB), R1
+ MOVD $·sinhAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·sinhAsm(SB)
+
+GLOBL sinhvectorfacility+0x00(SB), NOPTR, $8
+DATA sinhvectorfacility+0x00(SB)/8, $·sinhTrampolineSetup(SB)
+
+
+
+TEXT ·Tanh(SB),NOSPLIT,$0
+ MOVD tanhvectorfacility+0x00(SB),R1
+ BR (R1)
+
+TEXT ·tanhTrampolineSetup(SB),NOSPLIT, $0
+ MOVB ·hasVX(SB), R1
+ CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported
+ MOVD $tanhvectorfacility+0x00(SB), R1
+ MOVD $·tanh(SB), R2
+ MOVD R2, 0(R1)
+ BR ·tanh(SB)
+vectorimpl:
+ MOVD $tanhvectorfacility+0x00(SB), R1
+ MOVD $·tanhAsm(SB), R2
+ MOVD R2, 0(R1)
+ BR ·tanhAsm(SB)
+
+GLOBL tanhvectorfacility+0x00(SB), NOPTR, $8
+DATA tanhvectorfacility+0x00(SB)/8, $·tanhTrampolineSetup(SB)
+
-TEXT ·Tan(SB),NOSPLIT,$0
- BR ·tan(SB)
// Tanh(±0) = ±0
// Tanh(±Inf) = ±1
// Tanh(NaN) = NaN
-func Tanh(x float64) float64 {
+func Tanh(x float64) float64
+
+func tanh(x float64) float64 {
const MAXLOG = 8.8029691931113054295988e+01 // log(2**127)
z := Abs(x)
switch {
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// Minimax polynomial approximations
+DATA tanhrodataL18<>+0(SB)/8, $-1.0
+DATA tanhrodataL18<>+8(SB)/8, $-2.0
+DATA tanhrodataL18<>+16(SB)/8, $1.0
+DATA tanhrodataL18<>+24(SB)/8, $2.0
+DATA tanhrodataL18<>+32(SB)/8, $0.20000000000000011868E+01
+DATA tanhrodataL18<>+40(SB)/8, $0.13333333333333341256E+01
+DATA tanhrodataL18<>+48(SB)/8, $0.26666666663549111502E+00
+DATA tanhrodataL18<>+56(SB)/8, $0.66666666658721844678E+00
+DATA tanhrodataL18<>+64(SB)/8, $0.88890217768964374821E-01
+DATA tanhrodataL18<>+72(SB)/8, $0.25397199429103821138E-01
+DATA tanhrodataL18<>+80(SB)/8, $-.346573590279972643E+00
+DATA tanhrodataL18<>+88(SB)/8, $20.E0
+GLOBL tanhrodataL18<>+0(SB), RODATA, $96
+
+// Constants
+DATA tanhrlog2<>+0(SB)/8, $0x4007154760000000
+GLOBL tanhrlog2<>+0(SB), RODATA, $8
+DATA tanhxadd<>+0(SB)/8, $0xc2f0000100003ff0
+GLOBL tanhxadd<>+0(SB), RODATA, $8
+DATA tanhxmone<>+0(SB)/8, $-1.0
+GLOBL tanhxmone<>+0(SB), RODATA, $8
+DATA tanhxzero<>+0(SB)/8, $0
+GLOBL tanhxzero<>+0(SB), RODATA, $8
+
+// Polynomial coefficients
+DATA tanhtab<>+0(SB)/8, $0.000000000000000000E+00
+DATA tanhtab<>+8(SB)/8, $-.171540871271399150E-01
+DATA tanhtab<>+16(SB)/8, $-.306597931864376363E-01
+DATA tanhtab<>+24(SB)/8, $-.410200970469965021E-01
+DATA tanhtab<>+32(SB)/8, $-.486343079978231466E-01
+DATA tanhtab<>+40(SB)/8, $-.538226193725835820E-01
+DATA tanhtab<>+48(SB)/8, $-.568439602538111520E-01
+DATA tanhtab<>+56(SB)/8, $-.579091847395528847E-01
+DATA tanhtab<>+64(SB)/8, $-.571909584179366341E-01
+DATA tanhtab<>+72(SB)/8, $-.548312665987204407E-01
+DATA tanhtab<>+80(SB)/8, $-.509471843643441085E-01
+DATA tanhtab<>+88(SB)/8, $-.456353588448863359E-01
+DATA tanhtab<>+96(SB)/8, $-.389755254243262365E-01
+DATA tanhtab<>+104(SB)/8, $-.310332908285244231E-01
+DATA tanhtab<>+112(SB)/8, $-.218623539150173528E-01
+DATA tanhtab<>+120(SB)/8, $-.115062908917949451E-01
+GLOBL tanhtab<>+0(SB), RODATA, $128
+
+// Tanh returns the hyperbolic tangent of the argument.
+//
+// Special cases are:
+// Tanh(±0) = ±0
+// Tanh(±Inf) = ±1
+// Tanh(NaN) = NaN
+// The algorithm used is minimax polynomial approximation using a table of
+// polynomial coefficients determined with a Remez exchange algorithm.
+
+TEXT ·tanhAsm(SB),NOSPLIT,$0-16
+ FMOVD x+0(FP), F0
+ //specail case Tanh(±0) = ±0
+ FMOVD $(0.0), F1
+ FCMPU F0, F1
+ BEQ tanhIsZero
+ MOVD $tanhrodataL18<>+0(SB), R5
+ WORD $0xB3120000 //ltdbr %f0,%f0
+ MOVD $0x4034000000000000, R1
+ BLTU L15
+ FMOVD F0, F1
+L2:
+ MOVD $tanhxadd<>+0(SB), R2
+ FMOVD 0(R2), F2
+ MOVD tanhrlog2<>+0(SB), R2
+ WORD $0xB3C10042 //ldgr %f4,%r2
+ WFMSDB V0, V4, V2, V4
+ MOVD $tanhtab<>+0(SB), R3
+ WORD $0xB3CD0024 //lgdr %r2,%f4
+ WORD $0xEC4239BC //risbg %r4,%r2,57,128+60,3
+ BYTE $0x03
+ BYTE $0x55
+ WORD $0xED105058 //cdb %f1,.L19-.L18(%r5)
+ BYTE $0x00
+ BYTE $0x19
+ WORD $0xEC12000F //risbgn %r1,%r2,64-64+0,64-64+0+16-1,64-0-16
+ BYTE $0x30
+ BYTE $0x59
+ WORD $0x68543000 //ld %f5,0(%r4,%r3)
+ WORD $0xB3C10061 //ldgr %f6,%r1
+ BLT L3
+ MOVD $tanhxzero<>+0(SB), R1
+ FMOVD 0(R1), F2
+ WFCHDBS V0, V2, V4
+ BEQ L9
+ WFCHDBS V2, V0, V2
+ BNE L1
+ MOVD $tanhxmone<>+0(SB), R1
+ FMOVD 0(R1), F0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L3:
+ FADD F4, F2
+ FMOVD tanhrodataL18<>+80(SB), F4
+ FMADD F4, F2, F0, F0
+ FMOVD tanhrodataL18<>+72(SB), F1
+ WFMDB V0, V0, V3
+ FMOVD tanhrodataL18<>+64(SB), F2
+ WFMADB V0, V1, V2, V1
+ FMOVD tanhrodataL18<>+56(SB), F4
+ FMOVD tanhrodataL18<>+48(SB), F2
+ WFMADB V1, V3, V4, V1
+ FMOVD tanhrodataL18<>+40(SB), F4
+ WFMADB V3, V2, V4, V2
+ FMOVD tanhrodataL18<>+32(SB), F4
+ WORD $0xB9270022 //lhr %r2,%r2
+ WFMADB V3, V1, V4, V1
+ FMOVD tanhrodataL18<>+24(SB), F4
+ WFMADB V3, V2, V4, V3
+ WFMADB V0, V5, V0, V2
+ WFMADB V0, V1, V3, V0
+ WORD $0xA7183ECF //lhi %r1,16079
+ WFMADB V0, V2, V5, V2
+ FMUL F6, F2
+ MOVW R2, R10
+ MOVW R1, R11
+ CMPBLE R10, R11, L16
+ FMOVD F6, F0
+ WORD $0xED005010 //adb %f0,.L28-.L18(%r5)
+ BYTE $0x00
+ BYTE $0x1A
+ WORD $0xA7184330 //lhi %r1,17200
+ FADD F2, F0
+ MOVW R2, R10
+ MOVW R1, R11
+ CMPBGT R10, R11, L17
+ WORD $0xED605010 //sdb %f6,.L28-.L18(%r5)
+ BYTE $0x00
+ BYTE $0x1B
+ FADD F6, F2
+ WFDDB V0, V2, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L9:
+ FMOVD tanhrodataL18<>+16(SB), F0
+L1:
+ FMOVD F0, ret+8(FP)
+ RET
+
+L15:
+ FNEG F0, F1
+ BR L2
+L16:
+ FADD F6, F2
+ FMOVD tanhrodataL18<>+8(SB), F0
+ FMADD F4, F2, F0, F0
+ FMOVD tanhrodataL18<>+0(SB), F4
+ FNEG F0, F0
+ WFMADB V0, V2, V4, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+L17:
+ WFDDB V0, V4, V0
+ FMOVD tanhrodataL18<>+16(SB), F2
+ WFSDB V0, V2, V0
+ FMOVD F0, ret+8(FP)
+ RET
+
+tanhIsZero: //return ±0
+ FMOVD F0, ret+8(FP)
+ RET