From: Charles L. Dorian Date: Sat, 2 Jun 2012 17:06:12 +0000 (-0400) Subject: math: amd64 versions of Ceil, Floor and Trunc X-Git-Tag: go1.1rc2~3027 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=322057cbfce3c9c295aef4b87d1bf689f75c345f;p=gostls13.git math: amd64 versions of Ceil, Floor and Trunc Ceil to 4.81 from 20.6 ns/op Floor to 4.37 from 13.5 ns/op Trunc to 3.97 from 14.3 ns/op Also changed three MOVSDs to MOVAPDs in log_amd64.s R=rsc, golang-dev CC=golang-dev https://golang.org/cl/6262048 --- diff --git a/src/pkg/math/floor_amd64.s b/src/pkg/math/floor_amd64.s index 9fc49a56fd..e72cc3cf9c 100644 --- a/src/pkg/math/floor_amd64.s +++ b/src/pkg/math/floor_amd64.s @@ -1,12 +1,74 @@ -// Copyright 2011 The Go Authors. All rights reserved. +// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#define Big 0x4330000000000000 // 2**52 + +// func Floor(x float64) float64 TEXT ·Floor(SB),7,$0 - JMP ·floor(SB) + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + ANDQ AX,DX // DX = |x| + SUBQ $1,DX + MOVQ $(Big - 1), CX // if |x| >= 2**52-1 or IsNaN(x) or |x| == 0, return x + CMPQ DX,CX + JAE isBig_floor + MOVQ AX, X0 // X0 = x + CVTTSD2SQ X0, AX + CVTSQ2SD AX, X1 // X1 = float(int(x)) + CMPSD X1, X0, 1 // compare LT; X0 = 0xffffffffffffffff or 0 + MOVSD $(-1.0), X2 + ANDPD X2, X0 // if x < float(int(x)) {X0 = -1} else {X0 = 0} + ADDSD X1, X0 + MOVSD X0, r+8(FP) + RET +isBig_floor: + MOVQ AX, r+8(FP) // return x + RET +// func Ceil(x float64) float64 TEXT ·Ceil(SB),7,$0 - JMP ·ceil(SB) + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + MOVQ AX, BX // BX = copy of x + ANDQ DX, BX // BX = |x| + MOVQ $Big, CX // if |x| >= 2**52 or IsNaN(x), return x + CMPQ BX, CX + JAE isBig_ceil + MOVQ AX, X0 // X0 = x + MOVQ DX, X2 // X2 = sign bit mask + CVTTSD2SQ X0, AX + ANDNPD X0, X2 // X2 = sign + CVTSQ2SD AX, X1 // X1 = float(int(x)) + CMPSD X1, X0, 2 // compare LE; X0 = 0xffffffffffffffff or 0 + ORPD X2, X1 // if X1 = 0.0, incorporate sign + MOVSD $1.0, X3 + ANDNPD X3, X0 + ORPD X2, X0 // if float(int(x)) <= x {X0 = 1} else {X0 = -0} + ADDSD X1, X0 + MOVSD X0, r+8(FP) + RET +isBig_ceil: + MOVQ AX, r+8(FP) + RET +// func Trunc(x float64) float64 TEXT ·Trunc(SB),7,$0 - JMP ·trunc(SB) + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + MOVQ AX, BX // BX = copy of x + ANDQ DX, BX // BX = |x| + MOVQ $Big, CX // if |x| >= 2**52 or IsNaN(x), return x + CMPQ BX, CX + JAE isBig_trunc + MOVQ AX, X0 + MOVQ DX, X2 // X2 = sign bit mask + CVTTSD2SQ X0, AX + ANDNPD X0, X2 // X2 = sign + CVTSQ2SD AX, X0 // X0 = float(int(x)) + ORPD X2, X0 // if X0 = 0.0, incorporate sign + MOVSD X0, r+8(FP) + RET +isBig_trunc: + MOVQ AX, r+8(FP) // return x + RET diff --git a/src/pkg/math/log_amd64.s b/src/pkg/math/log_amd64.s index 79e35907c5..bf989bd2e8 100644 --- a/src/pkg/math/log_amd64.s +++ b/src/pkg/math/log_amd64.s @@ -54,13 +54,13 @@ TEXT ·Log(SB),7,$0 // s := f / (2 + f) MOVSD $2.0, X0 ADDSD X2, X0 - MOVSD X2, X3 + MOVAPD X2, X3 DIVSD X0, X3 // x1=k, x2= f, x3= s // s2 := s * s - MOVSD X3, X4 // x1= k, x2= f, x3= s + MOVAPD X3, X4 // x1= k, x2= f, x3= s MULSD X4, X4 // x1= k, x2= f, x3= s, x4= s2 // s4 := s2 * s2 - MOVSD X4, X5 // x1= k, x2= f, x3= s, x4= s2 + MOVAPD X4, X5 // x1= k, x2= f, x3= s, x4= s2 MULSD X5, X5 // x1= k, x2= f, x3= s, x4= s2, x5= s4 // t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) MOVSD $L7, X6