]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: emit fused multiply-{add,subtract} instructions on s390x
authorMichael Munday <munday@ca.ibm.com>
Mon, 13 Feb 2017 03:12:12 +0000 (22:12 -0500)
committerMichael Munday <munday@ca.ibm.com>
Tue, 28 Feb 2017 15:34:20 +0000 (15:34 +0000)
Explcitly block fused multiply-add pattern matching when a cast is used
after the multiplication, for example:

    - (a * b) + c        // can emit fused multiply-add
    - float64(a * b) + c // cannot emit fused multiply-add

float{32,64} and complex{64,128} casts of matching types are now kept
as OCONV operations rather than being replaced with OCONVNOP operations
because they now imply a rounding operation (and therefore aren't a
no-op anymore).

Operations (for example, multiplication) on complex types may utilize
fused multiply-add and -subtract instructions internally. There is no
way to disable this behavior at the moment.

Improves the performance of the floating point implementation of
poly1305:

name         old speed     new speed     delta
64           246MB/s ± 0%  275MB/s ± 0%  +11.48%   (p=0.000 n=10+8)
1K           312MB/s ± 0%  357MB/s ± 0%  +14.41%  (p=0.000 n=10+10)
64Unaligned  246MB/s ± 0%  274MB/s ± 0%  +11.43%  (p=0.000 n=10+10)
1KUnaligned  312MB/s ± 0%  357MB/s ± 0%  +14.39%   (p=0.000 n=10+8)

Updates #17895.

Change-Id: Ia771d275bb9150d1a598f8cc773444663de5ce16
Reviewed-on: https://go-review.googlesource.com/36963
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
28 files changed:
src/cmd/compile/internal/gc/asm_test.go
src/cmd/compile/internal/gc/const.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/gc/testdata/fp.go
src/cmd/compile/internal/gc/typecheck.go
src/cmd/compile/internal/s390x/prog.go
src/cmd/compile/internal/s390x/ssa.go
src/cmd/compile/internal/ssa/gen/386.rules
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/ARM.rules
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/MIPS.rules
src/cmd/compile/internal/ssa/gen/MIPS64.rules
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/S390X.rules
src/cmd/compile/internal/ssa/gen/S390XOps.go
src/cmd/compile/internal/ssa/gen/generic.rules
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssa/rewriteARM.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/ssa/rewriteMIPS.go
src/cmd/compile/internal/ssa/rewriteMIPS64.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/compile/internal/ssa/rewriteS390X.go
src/cmd/compile/internal/ssa/rewritegeneric.go

index 76c4f640b928af5c5eedc86af33c7c2d3edd062d..4525ba3f0cebc9f4d438daa5257d1c31ad82f70f 100644 (file)
@@ -638,6 +638,39 @@ var linuxS390XTests = []*asmTest{
                `,
                []string{"\tRLL\t[$]7,"},
        },
+       // Fused multiply-add/sub instructions.
+       {
+               `
+               func f14(x, y, z float64) float64 {
+                       return x * y + z
+               }
+               `,
+               []string{"\tFMADD\t"},
+       },
+       {
+               `
+               func f15(x, y, z float64) float64 {
+                       return x * y - z
+               }
+               `,
+               []string{"\tFMSUB\t"},
+       },
+       {
+               `
+               func f16(x, y, z float32) float32 {
+                       return x * y + z
+               }
+               `,
+               []string{"\tFMADDS\t"},
+       },
+       {
+               `
+               func f17(x, y, z float32) float32 {
+                       return x * y - z
+               }
+               `,
+               []string{"\tFMSUBS\t"},
+       },
 }
 
 var linuxARMTests = []*asmTest{
index 948d46d01f6e420287d78376d8997f2a9bbc020b..42a10a9761aab31bef4f3df82aef33f1dd012ac1 100644 (file)
@@ -719,6 +719,7 @@ func evconst(n *Node) {
                case OCONV_ | CTINT_,
                        OCONV_ | CTRUNE_,
                        OCONV_ | CTFLT_,
+                       OCONV_ | CTCPLX_,
                        OCONV_ | CTSTR_,
                        OCONV_ | CTBOOL_:
                        nl = convlit1(nl, n.Type, true, false)
index 11caacb9e94e055d1be717b5575ec2b8c73a5884..1850cbde132b46300635973a472738c1bbb0929e 100644 (file)
@@ -1287,8 +1287,8 @@ var fpConvOpToSSA = map[twoTypes]twoOpsAndType{
 
        // float
        twoTypes{TFLOAT64, TFLOAT32}: twoOpsAndType{ssa.OpCvt64Fto32F, ssa.OpCopy, TFLOAT32},
-       twoTypes{TFLOAT64, TFLOAT64}: twoOpsAndType{ssa.OpCopy, ssa.OpCopy, TFLOAT64},
-       twoTypes{TFLOAT32, TFLOAT32}: twoOpsAndType{ssa.OpCopy, ssa.OpCopy, TFLOAT32},
+       twoTypes{TFLOAT64, TFLOAT64}: twoOpsAndType{ssa.OpRound64F, ssa.OpCopy, TFLOAT64},
+       twoTypes{TFLOAT32, TFLOAT32}: twoOpsAndType{ssa.OpRound32F, ssa.OpCopy, TFLOAT32},
        twoTypes{TFLOAT32, TFLOAT64}: twoOpsAndType{ssa.OpCvt32Fto64F, ssa.OpCopy, TFLOAT64},
 }
 
@@ -1704,7 +1704,14 @@ func (s *state) expr(n *Node) *ssa.Value {
                if ft.IsComplex() && tt.IsComplex() {
                        var op ssa.Op
                        if ft.Size() == tt.Size() {
-                               op = ssa.OpCopy
+                               switch ft.Size() {
+                               case 8:
+                                       op = ssa.OpRound32F
+                               case 16:
+                                       op = ssa.OpRound64F
+                               default:
+                                       s.Fatalf("weird complex conversion %v -> %v", ft, tt)
+                               }
                        } else if ft.Size() == 8 && tt.Size() == 16 {
                                op = ssa.OpCvt32Fto64F
                        } else if ft.Size() == 16 && tt.Size() == 8 {
index 91656bef308d1ce2ace5ecb65affda90214972e2..18082c5634910678a58b75e9f80f5f43e88f5e01 100644 (file)
@@ -232,6 +232,141 @@ func integer2floatConversions() int {
        return fails
 }
 
+func multiplyAdd() int {
+       fails := 0
+       {
+               // Test that a multiply-accumulate operation with intermediate
+               // rounding forced by a float32() cast produces the expected
+               // result.
+               // Test cases generated experimentally on a system (s390x) that
+               // supports fused multiply-add instructions.
+               var tests = [...]struct{ x, y, z, res float32 }{
+                       {0.6046603, 0.9405091, 0.6645601, 1.2332485},      // fused multiply-add result: 1.2332486
+                       {0.67908466, 0.21855305, 0.20318687, 0.3516029},   // fused multiply-add result: 0.35160288
+                       {0.29311424, 0.29708257, 0.752573, 0.8396522},     // fused multiply-add result: 0.8396521
+                       {0.5305857, 0.2535405, 0.282081, 0.41660595},      // fused multiply-add result: 0.41660598
+                       {0.29711226, 0.89436173, 0.097454615, 0.36318043}, // fused multiply-add result: 0.36318046
+                       {0.6810783, 0.24151509, 0.31152245, 0.47601312},   // fused multiply-add result: 0.47601315
+                       {0.73023146, 0.18292491, 0.4283571, 0.5619346},    // fused multiply-add result: 0.56193465
+                       {0.89634174, 0.32208398, 0.7211478, 1.009845},     // fused multiply-add result: 1.0098451
+                       {0.6280982, 0.12675293, 0.2813303, 0.36094356},    // fused multiply-add result: 0.3609436
+                       {0.29400632, 0.75316125, 0.15096405, 0.3723982},   // fused multiply-add result: 0.37239823
+               }
+               check := func(s string, got, expected float32) int {
+                       if got != expected {
+                               fmt.Printf("multiplyAdd: %s, expected %g, got %g\n", s, expected, got)
+                               return 1
+                       }
+                       return 0
+               }
+               for _, t := range tests {
+                       fails += check(
+                               fmt.Sprintf("float32(%v * %v) + %v", t.x, t.y, t.z),
+                               func(x, y, z float32) float32 {
+                                       return float32(x*y) + z
+                               }(t.x, t.y, t.z),
+                               t.res)
+
+                       fails += check(
+                               fmt.Sprintf("%v += float32(%v * %v)", t.z, t.x, t.y),
+                               func(x, y, z float32) float32 {
+                                       z += float32(x * y)
+                                       return z
+                               }(t.x, t.y, t.z),
+                               t.res)
+               }
+       }
+       {
+               // Test that a multiply-accumulate operation with intermediate
+               // rounding forced by a float64() cast produces the expected
+               // result.
+               // Test cases generated experimentally on a system (s390x) that
+               // supports fused multiply-add instructions.
+               var tests = [...]struct{ x, y, z, res float64 }{
+                       {0.4688898449024232, 0.28303415118044517, 0.29310185733681576, 0.42581369658590373}, // fused multiply-add result: 0.4258136965859037
+                       {0.7886049150193449, 0.3618054804803169, 0.8805431227416171, 1.1658647029293308},    // fused multiply-add result: 1.1658647029293305
+                       {0.7302314772948083, 0.18292491645390843, 0.4283570818068078, 0.5619346137829748},   // fused multiply-add result: 0.5619346137829747
+                       {0.6908388315056789, 0.7109071952999951, 0.5637795958152644, 1.0549018919252924},    // fused multiply-add result: 1.0549018919252926
+                       {0.4584424785756506, 0.6001655953233308, 0.02626515060968944, 0.3014065536855481},   // fused multiply-add result: 0.30140655368554814
+                       {0.539210105890946, 0.9756748149873165, 0.7507630564795985, 1.2768567767840384},     // fused multiply-add result: 1.2768567767840386
+                       {0.7830349733960021, 0.3932509992288867, 0.1304138461737918, 0.4383431318929343},    // fused multiply-add result: 0.43834313189293433
+                       {0.6841751300974551, 0.6530402051353608, 0.524499759549865, 0.9712936268572192},     // fused multiply-add result: 0.9712936268572193
+                       {0.3691117091643448, 0.826454125634742, 0.34768170859156955, 0.6527356034505334},    // fused multiply-add result: 0.6527356034505333
+                       {0.16867966833433606, 0.33136826030698385, 0.8279280961505588, 0.8838231843956668},  // fused multiply-add result: 0.8838231843956669
+               }
+               check := func(s string, got, expected float64) int {
+                       if got != expected {
+                               fmt.Printf("multiplyAdd: %s, expected %g, got %g\n", s, expected, got)
+                               return 1
+                       }
+                       return 0
+               }
+               for _, t := range tests {
+                       fails += check(
+                               fmt.Sprintf("float64(%v * %v) + %v", t.x, t.y, t.z),
+                               func(x, y, z float64) float64 {
+                                       return float64(x*y) + z
+                               }(t.x, t.y, t.z),
+                               t.res)
+
+                       fails += check(
+                               fmt.Sprintf("%v += float64(%v * %v)", t.z, t.x, t.y),
+                               func(x, y, z float64) float64 {
+                                       z += float64(x * y)
+                                       return z
+                               }(t.x, t.y, t.z),
+                               t.res)
+               }
+       }
+       {
+               // Test that a multiply-accumulate operation with intermediate
+               // rounding forced by a complex128() cast produces the expected
+               // result.
+               // Test cases generated experimentally on a system (s390x) that
+               // supports fused multiply-add instructions.
+               var tests = [...]struct {
+                       x, y float64
+                       res  complex128
+               }{
+                       {0.6046602879796196, 0.9405090880450124, (2.754489951983871 + 3i)},    // fused multiply-add result: (2.7544899519838713 + 3i)
+                       {0.09696951891448456, 0.30091186058528707, (0.5918204173287407 + 3i)}, // fused multiply-add result: (0.5918204173287408 + 3i)
+                       {0.544155573000885, 0.27850762181610883, (1.910974340818764 + 3i)},    // fused multiply-add result: (1.9109743408187638 + 3i)
+                       {0.9769168685862624, 0.07429099894984302, (3.0050416047086297 + 3i)},  // fused multiply-add result: (3.00504160470863 + 3i)
+                       {0.9269868035744142, 0.9549454404167818, (3.735905851140024 + 3i)},    // fused multiply-add result: (3.7359058511400245 + 3i)
+                       {0.7109071952999951, 0.5637795958152644, (2.69650118171525 + 3i)},     // fused multiply-add result: (2.6965011817152496 + 3i)
+                       {0.7558235074915978, 0.40380328579570035, (2.671273808270494 + 3i)},   // fused multiply-add result: (2.6712738082704934 + 3i)
+                       {0.13065111702897217, 0.9859647293402467, (1.3779180804271633 + 3i)},  // fused multiply-add result: (1.3779180804271631 + 3i)
+                       {0.8963417453962161, 0.3220839705208817, (3.0111092067095298 + 3i)},   // fused multiply-add result: (3.01110920670953 + 3i)
+                       {0.39998376285699544, 0.497868113342702, (1.697819401913688 + 3i)},    // fused multiply-add result: (1.6978194019136883 + 3i)
+               }
+               check := func(s string, got, expected complex128) int {
+                       if got != expected {
+                               fmt.Printf("multiplyAdd: %s, expected %v, got %v\n", s, expected, got)
+                               return 1
+                       }
+                       return 0
+               }
+               for _, t := range tests {
+                       fails += check(
+                               fmt.Sprintf("complex128(complex(%v, 1)*3) + complex(%v, 0)", t.x, t.y),
+                               func(x, y float64) complex128 {
+                                       return complex128(complex(x, 1)*3) + complex(y, 0)
+                               }(t.x, t.y),
+                               t.res)
+
+                       fails += check(
+                               fmt.Sprintf("z := complex(%v, 1); z += complex128(complex(%v, 1) * 3)", t.y, t.x),
+                               func(x, y float64) complex128 {
+                                       z := complex(y, 0)
+                                       z += complex128(complex(x, 1) * 3)
+                                       return z
+                               }(t.x, t.y),
+                               t.res)
+               }
+       }
+       return fails
+}
+
 const (
        aa = 0x1000000000000000
        ab = 0x100000000000000
@@ -1658,6 +1793,8 @@ func main() {
 
        fails += integer2floatConversions()
 
+       fails += multiplyAdd()
+
        var zero64 float64 = 0.0
        var one64 float64 = 1.0
        var inf64 float64 = 1.0 / zero64
index ba52b7120044cc13b3ecc41d46cefa2c6b706e63..1aca44bce5cce80537723b8c9425538fc9581640 100644 (file)
@@ -1716,6 +1716,13 @@ OpSwitch:
                                *r = *n
                                n.Op = OLITERAL
                                n.SetVal(n.Left.Val())
+                       } else if t.Etype == n.Type.Etype {
+                               switch t.Etype {
+                               case TFLOAT32, TFLOAT64, TCOMPLEX64, TCOMPLEX128:
+                                       // Floating point casts imply rounding and
+                                       // so the conversion must be kept.
+                                       n.Op = OCONV
+                               }
                        }
 
                // do not use stringtoarraylit.
index fbe9291a2416d61b67014cd06c4f674e49827134..07c1a45cd5a5924928bf98c8e8e7d38164fbebd9 100644 (file)
@@ -72,21 +72,25 @@ var progtable = [s390x.ALAST & obj.AMask]gc.ProgInfo{
        s390x.AFLOGR & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite},
 
        // Floating point.
-       s390x.AFADD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
-       s390x.AFADDS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
-       s390x.AFSUB & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
-       s390x.AFSUBS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
-       s390x.AFMUL & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
-       s390x.AFMULS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
-       s390x.AFDIV & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
-       s390x.AFDIVS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
-       s390x.AFCMPU & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightRead},
-       s390x.ACEBR & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RightRead},
-       s390x.ALEDBR & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv},
-       s390x.ALDEBR & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv},
-       s390x.AFSQRT & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},
-       s390x.AFNEG & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},
-       s390x.AFNEGS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite},
+       s390x.AFADD & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFADDS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFSUB & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFSUBS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFMUL & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFMULS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFDIV & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFDIVS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFCMPU & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightRead},
+       s390x.ACEBR & obj.AMask:   {Flags: gc.SizeF | gc.LeftRead | gc.RightRead},
+       s390x.ALEDBR & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv},
+       s390x.ALDEBR & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv},
+       s390x.AFSQRT & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},
+       s390x.AFNEG & obj.AMask:   {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite},
+       s390x.AFNEGS & obj.AMask:  {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite},
+       s390x.AFMADD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightRead | gc.RightWrite},
+       s390x.AFMADDS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightRead | gc.RightWrite},
+       s390x.AFMSUB & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightRead | gc.RightWrite},
+       s390x.AFMSUBS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightRead | gc.RightWrite},
 
        // Conversions
        s390x.ACEFBRA & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Conv},
index f057e7315dea5f70ce24d5a2a33fb66f7141445d..b349f012953b21020db6080ebee2c4d84219e002 100644 (file)
@@ -193,6 +193,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                        v.Fatalf("input[0] and output not in same register %s", v.LongString())
                }
                opregreg(v.Op.Asm(), r, v.Args[1].Reg())
+       case ssa.OpS390XFMADD, ssa.OpS390XFMADDS,
+               ssa.OpS390XFMSUB, ssa.OpS390XFMSUBS:
+               r := v.Reg()
+               if r != v.Args[0].Reg() {
+                       v.Fatalf("input[0] and output not in same register %s", v.LongString())
+               }
+               r1 := v.Args[1].Reg()
+               r2 := v.Args[2].Reg()
+               p := gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r1
+               p.Reg = r2
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
        case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
                ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
                ssa.OpS390XMODD, ssa.OpS390XMODW,
@@ -465,6 +479,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpS390XLoweredGetClosurePtr:
                // Closure pointer is R12 (already)
                gc.CheckLoweredGetClosurePtr(v)
+       case ssa.OpS390XLoweredRound32F, ssa.OpS390XLoweredRound64F:
+               // input is already rounded
        case ssa.OpS390XLoweredGetG:
                r := v.Reg()
                p := gc.Prog(s390x.AMOVD)
index c3503860d8a41b11ca545e67043d5c9883acc7a4..3640857b581828283757e8c5ab9696175480ceca 100644 (file)
 (Cvt32Fto64F x) -> (CVTSS2SD x)
 (Cvt64Fto32F x) -> (CVTSD2SS x)
 
+(Round32F x) -> x
+(Round64F x) -> x
+
 // Lowering shifts
 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
 //   result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff)
index a480b3e7bc695009747d50dd408df8058fede946..622f87ef36b771469818b269db6e48c67b2ae842 100644 (file)
 (Cvt32Fto64F x) -> (CVTSS2SD x)
 (Cvt64Fto32F x) -> (CVTSD2SS x)
 
+(Round32F x) -> x
+(Round64F x) -> x
+
 // Lowering shifts
 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
 //   result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff)
index 7f5bc9e510215f20eef34409204788064ad3e110..fc17573ee3a4caccac5951e29ace0abf82ef927a 100644 (file)
 (Cvt32Fto64F x) -> (MOVFD x)
 (Cvt64Fto32F x) -> (MOVDF x)
 
+(Round32F x) -> x
+(Round64F x) -> x
+
 // comparisons
 (Eq8 x y)  -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
index cd9bfc0b74834f463e5aa6013c748849684dbbc0..94d5902abda40e717943b96952ee7d53a431a570 100644 (file)
 (Cvt32Fto64F x) -> (FCVTSD x)
 (Cvt64Fto32F x) -> (FCVTDS x)
 
+(Round32F x) -> x
+(Round64F x) -> x
+
 // comparisons
 (Eq8 x y)  -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Eq16 x y) -> (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
index 1baa0028e0ac5e493c20a4c1f16d81fe3f605eb0..42aef79f564205d4c09480b6a5f17803b5538aaa 100644 (file)
 (Cvt32Fto64F x) -> (MOVFD x)
 (Cvt64Fto32F x) -> (MOVDF x)
 
+(Round32F x) -> x
+(Round64F x) -> x
+
 // comparisons
 (Eq8 x y)  -> (SGTUconst [1] (XOR (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Eq16 x y) -> (SGTUconst [1] (XOR (ZeroExt16to32 x) (ZeroExt16to32 y)))
index 47487bff36eff3333b8158d17ba232838c049d7d..720f91d7474c1533e21ee0e5a04db89e971d26ef 100644 (file)
 (Cvt32Fto64F x) -> (MOVFD x)
 (Cvt64Fto32F x) -> (MOVDF x)
 
+(Round32F x) -> x
+(Round64F x) -> x
+
 // comparisons
 (Eq8 x y)  -> (SGTU (MOVVconst [1]) (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)))
 (Eq16 x y) -> (SGTU (MOVVconst [1]) (XOR (ZeroExt16to64 x) (ZeroExt16to64 y)))
index 56605dc1a06f60831da54b102e811a9a768437bd..f44bb34f9ec07b6b5a2647a97b9a23df65e8ebab 100644 (file)
@@ -74,6 +74,9 @@
 (Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64
 (Cvt64Fto32F x) -> (FRSP x)
 
+(Round32F x) -> x
+(Round64F x) -> x
+
 (Sqrt x) -> (FSQRT x)
 
 // Lowering constants
index 0b5aa8e49662f4472aabd5601273f23b5fd88c93..82468ec2c3f197143e80f0433b10b594792a9a9c 100644 (file)
 (Cvt32Fto64F x) -> (LDEBR x)
 (Cvt64Fto32F x) -> (LEDBR x)
 
+(Round32F x) -> (LoweredRound32F x)
+(Round64F x) -> (LoweredRound64F x)
+
 // Lowering shifts
 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
 //   result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff)
 (XOR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c^d])
 (XORconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
 (XORWconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
+(LoweredRound32F x:(FMOVSconst)) -> x
+(LoweredRound64F x:(FMOVDconst)) -> x
 
 // generic simplifications
 // TODO: more of this
 (XOR x x) -> (MOVDconst [0])
 (XORW x x) -> (MOVDconst [0])
 
+// fused multiply-add
+(FADD x (FMUL y z)) -> (FMADD x y z)
+(FADDS x (FMULS y z)) -> (FMADDS x y z)
+(FADD (FMUL y z) x) -> (FMADD x y z)
+(FADDS (FMULS y z) x) -> (FMADDS x y z)
+(FSUB (FMUL y z) x) -> (FMSUB x y z)
+(FSUBS (FMULS y z) x) -> (FMSUBS x y z)
+
 // Fold memory operations into operations.
 // Exclude global data (SB) because these instructions cannot handle relative addresses.
 // TODO(mundaym): use LARL in the assembler to handle SB?
index 40ba25294195611834993894f2a10b6a428fe34c..11f6656197fd4ea49bf1daf9714883c5d899923f 100644 (file)
@@ -141,6 +141,7 @@ func init() {
 
                fp01        = regInfo{inputs: []regMask{}, outputs: fponly}
                fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
+               fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
                fp21clobber = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
                fpgp        = regInfo{inputs: fponly, outputs: gponly}
                gpfp        = regInfo{inputs: gponly, outputs: fponly}
@@ -166,16 +167,20 @@ func init() {
 
        var S390Xops = []opData{
                // fp ops
-               {name: "FADDS", argLength: 2, reg: fp21clobber, asm: "FADDS", commutative: true, resultInArg0: true, clobberFlags: true}, // fp32 add
-               {name: "FADD", argLength: 2, reg: fp21clobber, asm: "FADD", commutative: true, resultInArg0: true, clobberFlags: true},   // fp64 add
-               {name: "FSUBS", argLength: 2, reg: fp21clobber, asm: "FSUBS", resultInArg0: true, clobberFlags: true},                    // fp32 sub
-               {name: "FSUB", argLength: 2, reg: fp21clobber, asm: "FSUB", resultInArg0: true, clobberFlags: true},                      // fp64 sub
-               {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true, resultInArg0: true},                            // fp32 mul
-               {name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true, resultInArg0: true},                              // fp64 mul
-               {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS", resultInArg0: true},                                               // fp32 div
-               {name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV", resultInArg0: true},                                                 // fp64 div
-               {name: "FNEGS", argLength: 1, reg: fp11clobber, asm: "FNEGS", clobberFlags: true},                                        // fp32 neg
-               {name: "FNEG", argLength: 1, reg: fp11clobber, asm: "FNEG", clobberFlags: true},                                          // fp64 neg
+               {name: "FADDS", argLength: 2, reg: fp21clobber, asm: "FADDS", commutative: true, resultInArg0: true, clobberFlags: true}, // fp32 arg0 + arg1
+               {name: "FADD", argLength: 2, reg: fp21clobber, asm: "FADD", commutative: true, resultInArg0: true, clobberFlags: true},   // fp64 arg0 + arg1
+               {name: "FSUBS", argLength: 2, reg: fp21clobber, asm: "FSUBS", resultInArg0: true, clobberFlags: true},                    // fp32 arg0 - arg1
+               {name: "FSUB", argLength: 2, reg: fp21clobber, asm: "FSUB", resultInArg0: true, clobberFlags: true},                      // fp64 arg0 - arg1
+               {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true, resultInArg0: true},                            // fp32 arg0 * arg1
+               {name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true, resultInArg0: true},                              // fp64 arg0 * arg1
+               {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS", resultInArg0: true},                                               // fp32 arg0 / arg1
+               {name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV", resultInArg0: true},                                                 // fp64 arg0 / arg1
+               {name: "FNEGS", argLength: 1, reg: fp11clobber, asm: "FNEGS", clobberFlags: true},                                        // fp32 -arg0
+               {name: "FNEG", argLength: 1, reg: fp11clobber, asm: "FNEG", clobberFlags: true},                                          // fp64 -arg0
+               {name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS", resultInArg0: true},                                             // fp32 arg1 * arg2 + arg0
+               {name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true},                                               // fp64 arg1 * arg2 + arg0
+               {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true},                                             // fp32 arg1 * arg2 - arg0
+               {name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true},                                               // fp64 arg1 * arg2 - arg0
 
                {name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true}, // fp32 load
                {name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true}, // fp64 load
@@ -402,6 +407,9 @@ func init() {
                {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R12")}}},
                // arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
                {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{ptrsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
+               // Round ops to block fused-multiply-add extraction.
+               {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
+               {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},
 
                // MOVDconvert converts between pointers and integers.
                // We have a special op for this so as to not confuse GC
index e0a12dcae543123a8ee7f2111666f60990a9dbb4..f485f43875265ed7f563e65b3afa9612ecec8fcb 100644 (file)
@@ -46,6 +46,8 @@
 (Trunc64to32 (Const64 [c]))  -> (Const32  [int64(int32(c))])
 (Cvt64Fto32F (Const64F [c])) -> (Const32F [f2i(float64(i2f32(c)))])
 (Cvt32Fto64F (Const32F [c])) -> (Const64F [c]) // c is already a 64 bit float
+(Round32F x:(Const32F)) -> x
+(Round64F x:(Const64F)) -> x
 
 (Trunc16to8  (ZeroExt8to16  x)) -> x
 (Trunc32to8  (ZeroExt8to32  x)) -> x
index 3854a3954d2d2bf3d2b3d90df62fab15d3b125bc..ab8b0ab7658d2ab990fee4fe0c95a6295d1579cd 100644 (file)
@@ -343,6 +343,10 @@ var genericOps = []opData{
        {name: "Cvt32Fto64F", argLength: 1},
        {name: "Cvt64Fto32F", argLength: 1},
 
+       // Force rounding to precision of type.
+       {name: "Round32F", argLength: 1},
+       {name: "Round64F", argLength: 1},
+
        // Automatically inserted safety checks
        {name: "IsNonNil", argLength: 1, typ: "Bool"},        // arg0 != nil
        {name: "IsInBounds", argLength: 2, typ: "Bool"},      // 0 <= arg0 < arg1. arg1 is guaranteed >= 0.
index 7a962165c52136285e934da7defb48f07e3f86cd..9e103761284d2c5f2f8c2600b1b17aa810c431ea 100644 (file)
@@ -1390,6 +1390,10 @@ const (
        OpS390XFDIV
        OpS390XFNEGS
        OpS390XFNEG
+       OpS390XFMADDS
+       OpS390XFMADD
+       OpS390XFMSUBS
+       OpS390XFMSUB
        OpS390XFMOVSload
        OpS390XFMOVDload
        OpS390XFMOVSconst
@@ -1554,6 +1558,8 @@ const (
        OpS390XLoweredGetG
        OpS390XLoweredGetClosurePtr
        OpS390XLoweredNilCheck
+       OpS390XLoweredRound32F
+       OpS390XLoweredRound64F
        OpS390XMOVDconvert
        OpS390XFlagEQ
        OpS390XFlagLT
@@ -1830,6 +1836,8 @@ const (
        OpCvt64Fto64
        OpCvt32Fto64F
        OpCvt64Fto32F
+       OpRound32F
+       OpRound64F
        OpIsNonNil
        OpIsInBounds
        OpIsSliceInBounds
@@ -17446,6 +17454,70 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "FMADDS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          s390x.AFMADDS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:         "FMADD",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          s390x.AFMADD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:         "FMSUBS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          s390x.AFMSUBS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:         "FMSUB",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          s390x.AFMSUB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
        {
                name:           "FMOVSload",
                auxType:        auxSymOff,
@@ -19801,6 +19873,32 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "LoweredRound32F",
+               argLen:       1,
+               resultInArg0: true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:         "LoweredRound64F",
+               argLen:       1,
+               resultInArg0: true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
        {
                name:   "MOVDconvert",
                argLen: 2,
@@ -21457,6 +21555,16 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "Round32F",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Round64F",
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "IsNonNil",
                argLen:  1,
index a396ec1976838aa7754cb80a119772da75783d90..417f59fc0ac80005f559def1eb730dc2291180be 100644 (file)
@@ -488,6 +488,10 @@ func rewriteValue386(v *Value, config *Config) bool {
                return rewriteValue386_OpOr8(v, config)
        case OpOrB:
                return rewriteValue386_OpOrB(v, config)
+       case OpRound32F:
+               return rewriteValue386_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValue386_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValue386_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -12209,6 +12213,34 @@ func rewriteValue386_OpOrB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValue386_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValue386_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValue386_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 2668a16867cfb8cf0a75c77d5e3d05879dbb6c91..00a554d01ddec91ed8fd678d256cb2b751e06ae8 100644 (file)
@@ -678,6 +678,10 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                return rewriteValueAMD64_OpOr8(v, config)
        case OpOrB:
                return rewriteValueAMD64_OpOrB(v, config)
+       case OpRound32F:
+               return rewriteValueAMD64_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValueAMD64_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValueAMD64_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -20498,6 +20502,34 @@ func rewriteValueAMD64_OpOrB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueAMD64_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueAMD64_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 2ad662f8fe2947d246c1689a8cb9c421c10ab60b..a5b766ebbbe69551f277034f1bc80a331bd6fbff 100644 (file)
@@ -620,6 +620,10 @@ func rewriteValueARM(v *Value, config *Config) bool {
                return rewriteValueARM_OpOr8(v, config)
        case OpOrB:
                return rewriteValueARM_OpOrB(v, config)
+       case OpRound32F:
+               return rewriteValueARM_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValueARM_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValueARM_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -15772,6 +15776,34 @@ func rewriteValueARM_OpOrB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueARM_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueARM_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueARM_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 0e60aaad85ac0973aa7f02924b5d295a28ae21e2..93472934ae7c0fbcf1e4bd13f097ce928133ced2 100644 (file)
@@ -576,6 +576,10 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpOr8(v, config)
        case OpOrB:
                return rewriteValueARM64_OpOrB(v, config)
+       case OpRound32F:
+               return rewriteValueARM64_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValueARM64_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValueARM64_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -13172,6 +13176,34 @@ func rewriteValueARM64_OpOrB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueARM64_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueARM64_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueARM64_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 2c320a9216fadae3b44e8a26288bf56ce428db5f..5c45d1c481423862f5c2995421cb7cd77cc9ba51 100644 (file)
@@ -400,6 +400,10 @@ func rewriteValueMIPS(v *Value, config *Config) bool {
                return rewriteValueMIPS_OpOr8(v, config)
        case OpOrB:
                return rewriteValueMIPS_OpOrB(v, config)
+       case OpRound32F:
+               return rewriteValueMIPS_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValueMIPS_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValueMIPS_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -7063,6 +7067,34 @@ func rewriteValueMIPS_OpOrB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueMIPS_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueMIPS_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueMIPS_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index f3d0fe3aa63f1b591884dcf982989d8aa75b862d..ef07ab0268853530a84bf06c36e9f719a7fafbd7 100644 (file)
@@ -436,6 +436,10 @@ func rewriteValueMIPS64(v *Value, config *Config) bool {
                return rewriteValueMIPS64_OpOr8(v, config)
        case OpOrB:
                return rewriteValueMIPS64_OpOrB(v, config)
+       case OpRound32F:
+               return rewriteValueMIPS64_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValueMIPS64_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValueMIPS64_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -7377,6 +7381,34 @@ func rewriteValueMIPS64_OpOrB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueMIPS64_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueMIPS64_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueMIPS64_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 2a8bc65d1b5a3bd8967261cbfdbdd04e936b250d..bb6678b590986196e5ca000c318b34d9bcb5fb9b 100644 (file)
@@ -436,6 +436,10 @@ func rewriteValuePPC64(v *Value, config *Config) bool {
                return rewriteValuePPC64_OpPPC64XOR(v, config)
        case OpPPC64XORconst:
                return rewriteValuePPC64_OpPPC64XORconst(v, config)
+       case OpRound32F:
+               return rewriteValuePPC64_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValuePPC64_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValuePPC64_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -7424,6 +7428,34 @@ func rewriteValuePPC64_OpPPC64XORconst(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValuePPC64_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValuePPC64_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValuePPC64_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 358c28e52948717520847f641ea1796cc223ff5e..29ea57c94811dcb8329595f37bf9e804e5e9c71a 100644 (file)
@@ -374,6 +374,10 @@ func rewriteValueS390X(v *Value, config *Config) bool {
                return rewriteValueS390X_OpOr8(v, config)
        case OpOrB:
                return rewriteValueS390X_OpOrB(v, config)
+       case OpRound32F:
+               return rewriteValueS390X_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValueS390X_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValueS390X_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -470,6 +474,10 @@ func rewriteValueS390X(v *Value, config *Config) bool {
                return rewriteValueS390X_OpS390XCMPWconst(v, config)
        case OpS390XCMPconst:
                return rewriteValueS390X_OpS390XCMPconst(v, config)
+       case OpS390XFADD:
+               return rewriteValueS390X_OpS390XFADD(v, config)
+       case OpS390XFADDS:
+               return rewriteValueS390X_OpS390XFADDS(v, config)
        case OpS390XFMOVDload:
                return rewriteValueS390X_OpS390XFMOVDload(v, config)
        case OpS390XFMOVDloadidx:
@@ -486,6 +494,14 @@ func rewriteValueS390X(v *Value, config *Config) bool {
                return rewriteValueS390X_OpS390XFMOVSstore(v, config)
        case OpS390XFMOVSstoreidx:
                return rewriteValueS390X_OpS390XFMOVSstoreidx(v, config)
+       case OpS390XFSUB:
+               return rewriteValueS390X_OpS390XFSUB(v, config)
+       case OpS390XFSUBS:
+               return rewriteValueS390X_OpS390XFSUBS(v, config)
+       case OpS390XLoweredRound32F:
+               return rewriteValueS390X_OpS390XLoweredRound32F(v, config)
+       case OpS390XLoweredRound64F:
+               return rewriteValueS390X_OpS390XLoweredRound64F(v, config)
        case OpS390XMOVBZload:
                return rewriteValueS390X_OpS390XMOVBZload(v, config)
        case OpS390XMOVBZloadidx:
@@ -4820,6 +4836,32 @@ func rewriteValueS390X_OpOrB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueS390X_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x)
+       // cond:
+       // result: (LoweredRound32F x)
+       for {
+               x := v.Args[0]
+               v.reset(OpS390XLoweredRound32F)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueS390X_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x)
+       // cond:
+       // result: (LoweredRound64F x)
+       for {
+               x := v.Args[0]
+               v.reset(OpS390XLoweredRound64F)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueS390X_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -7271,6 +7313,84 @@ func rewriteValueS390X_OpS390XCMPconst(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueS390X_OpS390XFADD(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (FADD x (FMUL y z))
+       // cond:
+       // result: (FMADD x y z)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XFMUL {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               v.reset(OpS390XFMADD)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
+       // match: (FADD (FMUL y z) x)
+       // cond:
+       // result: (FMADD x y z)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XFMUL {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               v.reset(OpS390XFMADD)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XFADDS(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (FADDS x (FMULS y z))
+       // cond:
+       // result: (FMADDS x y z)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpS390XFMULS {
+                       break
+               }
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               v.reset(OpS390XFMADDS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
+       // match: (FADDS (FMULS y z) x)
+       // cond:
+       // result: (FMADDS x y z)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XFMULS {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               v.reset(OpS390XFMADDS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
+       return false
+}
 func rewriteValueS390X_OpS390XFMOVDload(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -7899,6 +8019,86 @@ func rewriteValueS390X_OpS390XFMOVSstoreidx(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueS390X_OpS390XFSUB(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (FSUB (FMUL y z) x)
+       // cond:
+       // result: (FMSUB x y z)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XFMUL {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               v.reset(OpS390XFMSUB)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XFSUBS(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (FSUBS (FMULS y z) x)
+       // cond:
+       // result: (FMSUBS x y z)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpS390XFMULS {
+                       break
+               }
+               y := v_0.Args[0]
+               z := v_0.Args[1]
+               x := v.Args[1]
+               v.reset(OpS390XFMSUBS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(z)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XLoweredRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (LoweredRound32F x:(FMOVSconst))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XFMOVSconst {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueS390X_OpS390XLoweredRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (LoweredRound64F x:(FMOVDconst))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpS390XFMOVDconst {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueS390X_OpS390XMOVBZload(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 57e258b02906d9668c12dbb5b0c944cb3879b030..3033a31f98898bf810b0b33db15a5fb3aee7cc04 100644 (file)
@@ -270,6 +270,10 @@ func rewriteValuegeneric(v *Value, config *Config) bool {
                return rewriteValuegeneric_OpPhi(v, config)
        case OpPtrIndex:
                return rewriteValuegeneric_OpPtrIndex(v, config)
+       case OpRound32F:
+               return rewriteValuegeneric_OpRound32F(v, config)
+       case OpRound64F:
+               return rewriteValuegeneric_OpRound64F(v, config)
        case OpRsh16Ux16:
                return rewriteValuegeneric_OpRsh16Ux16(v, config)
        case OpRsh16Ux32:
@@ -9786,6 +9790,42 @@ func rewriteValuegeneric_OpPtrIndex(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValuegeneric_OpRound32F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round32F x:(Const32F))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpConst32F {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpRound64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Round64F x:(Const64F))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x.Op != OpConst64F {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValuegeneric_OpRsh16Ux16(v *Value, config *Config) bool {
        b := v.Block
        _ = b