]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify Ctz{32,64} and Bswap{32,64} on s390x
authorMichael Munday <munday@ca.ibm.com>
Sat, 17 Sep 2016 01:42:18 +0000 (21:42 -0400)
committerMichael Munday <munday@ca.ibm.com>
Mon, 19 Sep 2016 19:03:01 +0000 (19:03 +0000)
Also adds the 'find leftmost one' instruction (FLOGR) and replaces the
WORD-encoded use of FLOGR in math/big with it.

Change-Id: I18e7cd19e75b8501a6ae8bd925471f7e37ded206
Reviewed-on: https://go-review.googlesource.com/29372
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

13 files changed:
src/cmd/asm/internal/asm/testdata/s390x.s
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/s390x/prog.go
src/cmd/compile/internal/s390x/ssa.go
src/cmd/compile/internal/ssa/gen/S390X.rules
src/cmd/compile/internal/ssa/gen/S390XOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteS390X.go
src/cmd/internal/obj/s390x/a.out.go
src/cmd/internal/obj/s390x/anames.go
src/cmd/internal/obj/s390x/asmz.go
src/math/big/arith_s390x.s
test/intrinsic.go

index 6b6e2236d46456253f41c78d69b7ee93145eee35..3a01f29419bca7ea0d302f44ac67c644b4542b99 100644 (file)
@@ -104,6 +104,7 @@ TEXT main·foo(SB),7,$16-0 // TEXT main.foo(SB), 7, $16-0
        NEG     R1, R2                // b9030021
        NEGW    R1                    // b9130011
        NEGW    R1, R2                // b9130021
+       FLOGR   R2, R2                // b9830022
 
        LAA     R1, R2, 524287(R3)    // eb213fff7ff8
        LAAG    R4, R5, -524288(R6)   // eb54600080e8
index 5927fde86eb7b035e8b9399b9d1c116f6a9c5ff1..44be52b93775f8f18cddc9515c40375ab0732e18 100644 (file)
@@ -2519,16 +2519,16 @@ func intrinsicInit() {
                /******** runtime/internal/sys ********/
                intrinsicKey{"runtime/internal/sys", "Ctz32"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        return s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
-               }, sys.AMD64, sys.ARM64, sys.ARM),
+               }, sys.AMD64, sys.ARM64, sys.ARM, sys.S390X),
                intrinsicKey{"runtime/internal/sys", "Ctz64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        return s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
-               }, sys.AMD64, sys.ARM64, sys.ARM),
+               }, sys.AMD64, sys.ARM64, sys.ARM, sys.S390X),
                intrinsicKey{"runtime/internal/sys", "Bswap32"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        return s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
-               }, sys.AMD64, sys.ARM64, sys.ARM),
+               }, sys.AMD64, sys.ARM64, sys.ARM, sys.S390X),
                intrinsicKey{"runtime/internal/sys", "Bswap64"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
                        return s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
-               }, sys.AMD64, sys.ARM64, sys.ARM),
+               }, sys.AMD64, sys.ARM64, sys.ARM, sys.S390X),
 
                /******** runtime/internal/atomic ********/
                intrinsicKey{"runtime/internal/atomic", "Load"}: enableOnArch(func(s *state, n *Node) *ssa.Value {
index c0920b2e8bbdc275778fe18d197b75dfeb691de6..1dd5740f8263b0cf4718e3116065a6216177789c 100644 (file)
@@ -67,6 +67,7 @@ var progtable = [s390x.ALAST & obj.AMask]obj.ProgInfo{
        s390x.AMODDU & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
        s390x.AMODW & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
        s390x.AMODWU & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       s390x.AFLOGR & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite},
 
        // Floating point.
        s390x.AFADD & obj.AMask:  {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
index e6c5849e1c669c142e805f087a86794b9ff17e7b..2e21f7b0d8c1f6ee541eb25fe136c586bcac83c8 100644 (file)
@@ -525,16 +525,13 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                if gc.Maxarg < v.AuxInt {
                        gc.Maxarg = v.AuxInt
                }
-       case ssa.OpS390XNEG, ssa.OpS390XNEGW:
-               r := v.Reg()
+       case ssa.OpS390XFLOGR, ssa.OpS390XNEG, ssa.OpS390XNEGW,
+               ssa.OpS390XMOVWBR, ssa.OpS390XMOVDBR:
                p := gc.Prog(v.Op.Asm())
-               r1 := v.Args[0].Reg()
-               if r != r1 {
-                       p.From.Type = obj.TYPE_REG
-                       p.From.Reg = r1
-               }
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[0].Reg()
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = r
+               p.To.Reg = v.Reg()
        case ssa.OpS390XNOT, ssa.OpS390XNOTW:
                v.Fatalf("NOT/NOTW generated %s", v.LongString())
        case ssa.OpS390XMOVDEQ, ssa.OpS390XMOVDNE,
index 264e2805f99c1ac7467f0364da67a0cba8b2fb1c..44fdd146b1ea5e7e7a5015da9ef20cd990405e36 100644 (file)
 (OffPtr [off] ptr) && is32Bit(off) -> (ADDconst [off] ptr)
 (OffPtr [off] ptr) -> (ADD (MOVDconst [off]) ptr)
 
+// Ctz(x) = 64 - findLeftmostOne((x-1)&^x)
+(Ctz64 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (AND <t> (SUBconst <t> [1] x) (NOT <t> x))))
+(Ctz32 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x)))))
+
+(Bswap64 x) -> (MOVDBR x)
+(Bswap32 x) -> (MOVWBR x)
+
 (Sqrt x) -> (FSQRT x)
 
 // Lowering extension
index 69fcc4bf659e67cb364e7747e9c1de9d8f144339..9c362ae5e66dca03178aea14e3dabc59dcd80982 100644 (file)
@@ -315,6 +315,9 @@ func init() {
                {name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true},                  // ditto, sign extend to int64
                {name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "UInt64", clobberFlags: true, faultOnNilArg0: true},   // load 8 bytes from arg0+auxint+aux. arg1=mem
 
+               {name: "MOVWBR", argLength: 1, reg: gp11, asm: "MOVWBR"}, // arg0 swap bytes
+               {name: "MOVDBR", argLength: 1, reg: gp11, asm: "MOVDBR"}, // arg0 swap bytes
+
                {name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "UInt16", clobberFlags: true, faultOnNilArg0: true}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
                {name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "UInt32", clobberFlags: true, faultOnNilArg0: true}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
                {name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "UInt64", clobberFlags: true, faultOnNilArg0: true}, // load 8 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
@@ -387,6 +390,16 @@ func init() {
                {name: "FlagLT"}, // <
                {name: "FlagGT"}, // >
 
+               // find leftmost one
+               {
+                       name:         "FLOGR",
+                       argLength:    1,
+                       reg:          regInfo{inputs: gponly, outputs: []regMask{buildReg("R0")}, clobbers: buildReg("R1")},
+                       asm:          "FLOGR",
+                       typ:          "UInt64",
+                       clobberFlags: true,
+               },
+
                // store multiple
                {
                        name:           "STMG2",
index 8c2814658dda455731c7f6e1d1549354c316e40b..8ae954227c5e4748cf079d9b74e86131c413ef29 100644 (file)
@@ -1354,6 +1354,8 @@ const (
        OpS390XMOVWZload
        OpS390XMOVWload
        OpS390XMOVDload
+       OpS390XMOVWBR
+       OpS390XMOVDBR
        OpS390XMOVHBRload
        OpS390XMOVWBRload
        OpS390XMOVDBRload
@@ -1391,6 +1393,7 @@ const (
        OpS390XFlagEQ
        OpS390XFlagLT
        OpS390XFlagGT
+       OpS390XFLOGR
        OpS390XSTMG2
        OpS390XSTMG3
        OpS390XSTMG4
@@ -17110,6 +17113,32 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "MOVWBR",
+               argLen: 1,
+               asm:    s390x.AMOVWBR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                       },
+                       outputs: []outputInfo{
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                       },
+               },
+       },
+       {
+               name:   "MOVDBR",
+               argLen: 1,
+               asm:    s390x.AMOVDBR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                       },
+                       outputs: []outputInfo{
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                       },
+               },
+       },
        {
                name:           "MOVHBRload",
                auxType:        auxSymOff,
@@ -17581,6 +17610,21 @@ var opcodeTable = [...]opInfo{
                argLen: 0,
                reg:    regInfo{},
        },
+       {
+               name:         "FLOGR",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          s390x.AFLOGR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                       },
+                       clobbers: 2, // R1
+                       outputs: []outputInfo{
+                               {0, 1}, // R0
+                       },
+               },
+       },
        {
                name:           "STMG2",
                auxType:        auxSymOff,
index 2b03ce4b100b689c28f39c4e17fb322fe28eb893..2cd878a31dbc8deae5a51114c6f778f173fd4509 100644 (file)
@@ -36,6 +36,10 @@ func rewriteValueS390X(v *Value, config *Config) bool {
                return rewriteValueS390X_OpAndB(v, config)
        case OpAvg64u:
                return rewriteValueS390X_OpAvg64u(v, config)
+       case OpBswap32:
+               return rewriteValueS390X_OpBswap32(v, config)
+       case OpBswap64:
+               return rewriteValueS390X_OpBswap64(v, config)
        case OpClosureCall:
                return rewriteValueS390X_OpClosureCall(v, config)
        case OpCom16:
@@ -64,6 +68,10 @@ func rewriteValueS390X(v *Value, config *Config) bool {
                return rewriteValueS390X_OpConstNil(v, config)
        case OpConvert:
                return rewriteValueS390X_OpConvert(v, config)
+       case OpCtz32:
+               return rewriteValueS390X_OpCtz32(v, config)
+       case OpCtz64:
+               return rewriteValueS390X_OpCtz64(v, config)
        case OpCvt32Fto32:
                return rewriteValueS390X_OpCvt32Fto32(v, config)
        case OpCvt32Fto64:
@@ -887,6 +895,32 @@ func rewriteValueS390X_OpAvg64u(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueS390X_OpBswap32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Bswap32 x)
+       // cond:
+       // result: (MOVWBR x)
+       for {
+               x := v.Args[0]
+               v.reset(OpS390XMOVWBR)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueS390X_OpBswap64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Bswap64 x)
+       // cond:
+       // result: (MOVDBR x)
+       for {
+               x := v.Args[0]
+               v.reset(OpS390XMOVDBR)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueS390X_OpClosureCall(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -1078,6 +1112,62 @@ func rewriteValueS390X_OpConvert(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueS390X_OpCtz32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Ctz32 <t> x)
+       // cond:
+       // result: (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x)))))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v.reset(OpS390XSUB)
+               v0 := b.NewValue0(v.Line, OpS390XMOVDconst, config.fe.TypeUInt64())
+               v0.AuxInt = 64
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Line, OpS390XFLOGR, config.fe.TypeUInt64())
+               v2 := b.NewValue0(v.Line, OpS390XMOVWZreg, config.fe.TypeUInt64())
+               v3 := b.NewValue0(v.Line, OpS390XANDW, t)
+               v4 := b.NewValue0(v.Line, OpS390XSUBWconst, t)
+               v4.AuxInt = 1
+               v4.AddArg(x)
+               v3.AddArg(v4)
+               v5 := b.NewValue0(v.Line, OpS390XNOTW, t)
+               v5.AddArg(x)
+               v3.AddArg(v5)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v.AddArg(v1)
+               return true
+       }
+}
+func rewriteValueS390X_OpCtz64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Ctz64 <t> x)
+       // cond:
+       // result: (SUB (MOVDconst [64]) (FLOGR (AND <t> (SUBconst <t> [1] x) (NOT <t> x))))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v.reset(OpS390XSUB)
+               v0 := b.NewValue0(v.Line, OpS390XMOVDconst, config.fe.TypeUInt64())
+               v0.AuxInt = 64
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Line, OpS390XFLOGR, config.fe.TypeUInt64())
+               v2 := b.NewValue0(v.Line, OpS390XAND, t)
+               v3 := b.NewValue0(v.Line, OpS390XSUBconst, t)
+               v3.AuxInt = 1
+               v3.AddArg(x)
+               v2.AddArg(v3)
+               v4 := b.NewValue0(v.Line, OpS390XNOT, t)
+               v4.AddArg(x)
+               v2.AddArg(v4)
+               v1.AddArg(v2)
+               v.AddArg(v1)
+               return true
+       }
+}
 func rewriteValueS390X_OpCvt32Fto32(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 0f23931e003b393586a72d5fd145a9fb5af87865..b6d1975c79bd58fc0e597ff574f12a10f5830df2 100644 (file)
@@ -255,6 +255,9 @@ const (
        AMOVDLT
        AMOVDNE
 
+       // find leftmost one
+       AFLOGR
+
        // integer bitwise
        AAND
        AANDN
index 00a8d4126de4eb5fcd4449879e8599f10d1898be..589206f3aef032c5e7a8da111a984da3519b1ddc 100644 (file)
@@ -49,6 +49,7 @@ var Anames = []string{
        "MOVDLE",
        "MOVDLT",
        "MOVDNE",
+       "FLOGR",
        "AND",
        "ANDN",
        "NAND",
index 9d072041d04a5129968098935f4e1aa8e6238230..700137c322f83259222aef784d3c4b336bebce3e 100644 (file)
@@ -220,6 +220,9 @@ var optab = []Optab{
        // move on condition
        Optab{AMOVDEQ, C_REG, C_NONE, C_NONE, C_REG, 17, 0},
 
+       // find leftmost one
+       Optab{AFLOGR, C_REG, C_NONE, C_NONE, C_REG, 8, 0},
+
        // compare
        Optab{ACMP, C_REG, C_NONE, C_NONE, C_REG, 70, 0},
        Optab{ACMP, C_REG, C_NONE, C_NONE, C_LCON, 71, 0},
@@ -2864,6 +2867,13 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
                }
                zRSY(opcode, uint32(r1), uint32(r3), uint32(b2), uint32(d2), asm)
 
+       case 8: // find leftmost one
+               if p.To.Reg&1 != 0 {
+                       ctxt.Diag("target must be an even-numbered register")
+               }
+               // FLOGR also writes a mask to p.To.Reg+1.
+               zRRE(op_FLOGR, uint32(p.To.Reg), uint32(p.From.Reg), asm)
+
        case 10: // subtract reg [reg] reg
                r := int(p.Reg)
 
index 21929c169f93f9cb23a769aae2d8ea3f90470ac7..0a27eb9bcb0bca120a24ef040651dab4d357f375 100644 (file)
@@ -559,9 +559,9 @@ E7: SUB  $1, R7             // i--
 
 // func bitLen(x Word) (n int)
 TEXT ·bitLen(SB),NOSPLIT,$0
-       MOVD x+0(FP), R2
-       WORD $0xb9830022 // FLOGR R2,R2
-       MOVD $64, R3
-       SUB  R2, R3
-       MOVD R3, n+8(FP)
+       MOVD  x+0(FP), R2
+       FLOGR R2, R2 // clobbers R3
+       MOVD  $64, R3
+       SUB   R2, R3
+       MOVD  R3, n+8(FP)
        RET
index 3e3ec12fa47b050b07f1d641b25eeea60732067a..0b783d15df41a2f83d24fd854a284fcfbdbbb3bd 100644 (file)
@@ -1,5 +1,5 @@
 // errorcheckandrundir -0 -d=ssa/intrinsics/debug
-// +build amd64 arm64 arm
+// +build amd64 arm64 arm s390x
 
 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style