]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: add SARXQload and SARXLload
authorWayne Zuo <wdvxdr@golangcn.org>
Fri, 8 Apr 2022 09:33:50 +0000 (17:33 +0800)
committerKeith Randall <khr@golang.org>
Wed, 13 Apr 2022 17:48:12 +0000 (17:48 +0000)
Change-Id: I4e8dc5009a5b8af37d71b62f1322f94806d3e9d9
Reviewed-on: https://go-review.googlesource.com/c/go/+/399056
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/addressingmodes.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/bmi.go

index 9fde7753583286fb7b87bb67a43fed59229728b9..1ec86233209f1dce9e0cc1e6bf7032cb1424f33b 100644 (file)
@@ -287,7 +287,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.SetFrom3Reg(v.Args[0].Reg())
 
        case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
-               ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload:
+               ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
+               ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
                p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
                m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
                ssagen.AddAux(&m, v)
@@ -295,8 +296,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 
        case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
                ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
+               ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
                ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
-               ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8:
+               ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
+               ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
                p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
                m := obj.Addr{Type: obj.TYPE_MEM}
                memIdx(&m, v)
index c18ea68665735016f4db8843623672cba2ac849a..469ba0d494785702324415f2dbd69e7fb15bad2d 100644 (file)
@@ -344,11 +344,18 @@ var combine = map[[2]Op]Op{
        [2]Op{OpAMD64DIVSDload, OpAMD64LEAQ1}: OpAMD64DIVSDloadidx1,
        [2]Op{OpAMD64DIVSDload, OpAMD64LEAQ8}: OpAMD64DIVSDloadidx8,
 
+       [2]Op{OpAMD64SARXLload, OpAMD64ADDQ}: OpAMD64SARXLloadidx1,
+       [2]Op{OpAMD64SARXQload, OpAMD64ADDQ}: OpAMD64SARXQloadidx1,
        [2]Op{OpAMD64SHLXLload, OpAMD64ADDQ}: OpAMD64SHLXLloadidx1,
        [2]Op{OpAMD64SHLXQload, OpAMD64ADDQ}: OpAMD64SHLXQloadidx1,
        [2]Op{OpAMD64SHRXLload, OpAMD64ADDQ}: OpAMD64SHRXLloadidx1,
        [2]Op{OpAMD64SHRXQload, OpAMD64ADDQ}: OpAMD64SHRXQloadidx1,
 
+       [2]Op{OpAMD64SARXLload, OpAMD64LEAQ1}: OpAMD64SARXLloadidx1,
+       [2]Op{OpAMD64SARXLload, OpAMD64LEAQ4}: OpAMD64SARXLloadidx4,
+       [2]Op{OpAMD64SARXLload, OpAMD64LEAQ8}: OpAMD64SARXLloadidx8,
+       [2]Op{OpAMD64SARXQload, OpAMD64LEAQ1}: OpAMD64SARXQloadidx1,
+       [2]Op{OpAMD64SARXQload, OpAMD64LEAQ8}: OpAMD64SARXQloadidx8,
        [2]Op{OpAMD64SHLXLload, OpAMD64LEAQ1}: OpAMD64SHLXLloadidx1,
        [2]Op{OpAMD64SHLXLload, OpAMD64LEAQ4}: OpAMD64SHLXLloadidx4,
        [2]Op{OpAMD64SHLXLload, OpAMD64LEAQ8}: OpAMD64SHLXLloadidx8,
index 3a9de8dd03f5236910cca281dde9ddbd5fdf2289..2ffdea3d559c5d7bab675aa99e72c68f9fe3b318 100644 (file)
   && clobber(x0, x1, sh)
   => @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
 
+(SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
 (SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)  => (SHLX(Q|L)load [off] {sym} ptr x mem)
 (SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)  => (SHRX(Q|L)load [off] {sym} ptr x mem)
index 2eec6e03249da4dcf30d20c72835c3d338a5c489..23c157c2c56af0e78d36bc2ad62389ec3c800e4d 100644 (file)
@@ -956,11 +956,18 @@ func init() {
                {name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64
                {name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32
 
+               {name: "SARXLload", argLength: 3, reg: gp21shxload, asm: "SARXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
+               {name: "SARXQload", argLength: 3, reg: gp21shxload, asm: "SARXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64
                {name: "SHLXLload", argLength: 3, reg: gp21shxload, asm: "SHLXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 32
                {name: "SHLXQload", argLength: 3, reg: gp21shxload, asm: "SHLXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 64
                {name: "SHRXLload", argLength: 3, reg: gp21shxload, asm: "SHRXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
                {name: "SHRXQload", argLength: 3, reg: gp21shxload, asm: "SHRXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64
 
+               {name: "SARXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
+               {name: "SARXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+4*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
+               {name: "SARXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
+               {name: "SARXQloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SARXQ", scale: 1, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64
+               {name: "SARXQloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SARXQ", scale: 8, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64
                {name: "SHLXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+1*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
                {name: "SHLXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+4*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
                {name: "SHLXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+8*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
index 976d8873216bd8368a612b4cbbba86bc291c6bf2..c66c8d33d4a03473a7ce3b8b8821e769ebc4c322 100644 (file)
@@ -1064,10 +1064,17 @@ const (
        OpAMD64MOVBEQstoreidx8
        OpAMD64SARXQ
        OpAMD64SARXL
+       OpAMD64SARXLload
+       OpAMD64SARXQload
        OpAMD64SHLXLload
        OpAMD64SHLXQload
        OpAMD64SHRXLload
        OpAMD64SHRXQload
+       OpAMD64SARXLloadidx1
+       OpAMD64SARXLloadidx4
+       OpAMD64SARXLloadidx8
+       OpAMD64SARXQloadidx1
+       OpAMD64SARXQloadidx8
        OpAMD64SHLXLloadidx1
        OpAMD64SHLXLloadidx4
        OpAMD64SHLXLloadidx8
@@ -14147,6 +14154,40 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "SARXLload",
+               auxType:        auxSymOff,
+               argLen:         3,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ASARXL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:           "SARXQload",
+               auxType:        auxSymOff,
+               argLen:         3,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ASARXQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
        {
                name:           "SHLXLload",
                auxType:        auxSymOff,
@@ -14215,6 +14256,101 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "SARXLloadidx1",
+               auxType:        auxSymOff,
+               argLen:         4,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ASARXL,
+               scale:          1,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:           "SARXLloadidx4",
+               auxType:        auxSymOff,
+               argLen:         4,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ASARXL,
+               scale:          4,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:           "SARXLloadidx8",
+               auxType:        auxSymOff,
+               argLen:         4,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ASARXL,
+               scale:          8,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:           "SARXQloadidx1",
+               auxType:        auxSymOff,
+               argLen:         4,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ASARXQ,
+               scale:          1,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:           "SARXQloadidx8",
+               auxType:        auxSymOff,
+               argLen:         4,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ASARXQ,
+               scale:          8,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
        {
                name:           "SHLXLloadidx1",
                auxType:        auxSymOff,
index 81f1f1ae4eb9efcf77f88b5494e53d37ad621c4e..ecea8f09623e850ce8e831ecfd75a27995a8aef7 100644 (file)
@@ -20565,6 +20565,28 @@ func rewriteValueAMD64_OpAMD64SARXL(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
+       // match: (SARXL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (SARXLload [off] {sym} ptr x mem)
+       for {
+               l := v_0
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               x := v_1
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SARXLload)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SARXQ(v *Value) bool {
@@ -20761,6 +20783,28 @@ func rewriteValueAMD64_OpAMD64SARXQ(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
+       // match: (SARXQ l:(MOVQload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (SARXQload [off] {sym} ptr x mem)
+       for {
+               l := v_0
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               x := v_1
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SARXQload)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, x, mem)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SBBLcarrymask(v *Value) bool {
index 9dd2b0039ceafe6d4593f2ae65fa52e65d3c0531..1641d5ddd08d58cd53f6a0413ad096d1366b4b6c 100644 (file)
@@ -56,6 +56,22 @@ func sarx32(x, y int32) int32 {
        return x >> y
 }
 
+func sarx64_load(x []int64, i int) int64 {
+       // amd64/v3: `SARXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+       s := x[i] >> (i & 63)
+       // amd64/v3: `SARXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+       s = x[i+1] >> (s & 63)
+       return s
+}
+
+func sarx32_load(x []int32, i int) int32 {
+       // amd64/v3: `SARXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+       s := x[i] >> (i & 63)
+       // amd64/v3: `SARXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+       s = x[i+1] >> (s & 63)
+       return s
+}
+
 func shlrx64(x []uint64, i int, s uint64) uint64 {
        // amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
        s = x[i] >> i