{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
+ {AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
+ {AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
+ {AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
+ {AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
#define Big 0x4330000000000000 // 2**52
+// func hasSSE4() bool
+// returns whether SSE4.1 is supported
+TEXT ·hasSSE4(SB),NOSPLIT,$0
+ XORQ AX, AX
+ INCL AX
+ CPUID
+ SHRQ $19, CX
+ ANDQ $1, CX
+ MOVB CX, ret+0(FP)
+ RET
+
// func Floor(x float64) float64
TEXT ·Floor(SB),NOSPLIT,$0
+ CMPB math·useSSE4(SB), $1
+ JNE nosse4
+ ROUNDSD $1, x+0(FP), X0
+ MOVQ X0, ret+8(FP)
+ RET
+nosse4:
MOVQ x+0(FP), AX
MOVQ $~(1<<63), DX // sign bit mask
ANDQ AX,DX // DX = |x|
// func Ceil(x float64) float64
TEXT ·Ceil(SB),NOSPLIT,$0
+ CMPB math·useSSE4(SB), $1
+ JNE nosse4
+ ROUNDSD $2, x+0(FP), X0
+ MOVQ X0, ret+8(FP)
+ RET
+nosse4:
MOVQ x+0(FP), AX
MOVQ $~(1<<63), DX // sign bit mask
MOVQ AX, BX // BX = copy of x
--- /dev/null
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32
+
+package math
+
+//defined in floor_amd64.s
+func hasSSE4() bool
+
+var useSSE4 = hasSSE4()