]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: add instructions and rewrites for scalar-> vector moves
authorDavid Chase <drchase@google.com>
Thu, 21 Aug 2025 21:07:13 +0000 (17:07 -0400)
committerDavid Chase <drchase@google.com>
Tue, 2 Sep 2025 17:50:34 +0000 (10:50 -0700)
This required changes to the assembler so that VMOVSS and VMOVSD
could handle FP constants.

Change-Id: Iaa2f8df71867a3283bc058b7ec691b56a3e73621
Reviewed-on: https://go-review.googlesource.com/c/go/+/698240
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/_gen/AMD64.rules
src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/internal/obj/x86/obj6.go

index 58a0f9cc8140b72b3d73130d353d4dffc06088dd..817f6dbc1d71943f14ea26117c046408b52eb2c6 100644 (file)
@@ -1723,6 +1723,24 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = simdReg(v)
 
+       case ssa.OpAMD64VMOVQload, ssa.OpAMD64VMOVDload,
+               ssa.OpAMD64VMOVSSload, ssa.OpAMD64VMOVSDload:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = v.Args[0].Reg()
+               ssagen.AddAux(&p.From, v)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = simdReg(v)
+
+       case ssa.OpAMD64VMOVSSconst, ssa.OpAMD64VMOVSDconst:
+               // for loading constants directly into SIMD registers
+               x := simdReg(v)
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_FCONST
+               p.From.Val = math.Float64frombits(uint64(v.AuxInt))
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = x
+
        case ssa.OpAMD64VMOVD, ssa.OpAMD64VMOVQ:
                // These are for initializing the least 32/64 bits of a SIMD register from an "int".
                p := s.Prog(v.Op.Asm())
index 0c7c7ced4375f1d1b82bf68af43fcd613ce0d0e6..2300cc3757d7e9bee3a8dc9ce16ce4a80adab3bc 100644 (file)
 (VPBROADCASTW(128|256|512) x:(VPINSRW128 [0] (Zero128    <t>) y)) && x.Uses == 1 =>
        (VPBROADCASTW(128|256|512)   (VMOVQ <types.TypeVec128> y))
 
+(VMOVQ x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (VMOVQload <v.Type> [off] {sym} ptr mem)
+(VMOVD x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (VMOVDload <v.Type> [off] {sym} ptr mem)
+
+(VMOVSDf2v x:(MOVSDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (VMOVSDload <v.Type> [off] {sym} ptr mem)
+(VMOVSSf2v x:(MOVSSload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (VMOVSSload <v.Type> [off] {sym} ptr mem)
+
+(VMOVSDf2v x:(MOVSDconst [c] )) => (VMOVSDconst [c] )
+(VMOVSSf2v x:(MOVSSconst [c] )) => (VMOVSSconst [c] )
+
index 03f38db640074b5485d939c421616d1ec74f1cd5..96001e203f1fe34ac0a442ef8951b4b853ad903c 100644 (file)
@@ -1389,6 +1389,14 @@ func init() {
                {name: "VMOVQ", argLength: 1, reg: gpv, asm: "VMOVQ"},
                {name: "VMOVD", argLength: 1, reg: gpv, asm: "VMOVD"},
 
+               {name: "VMOVQload", argLength: 2, reg: fpload, asm: "VMOVQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"},
+               {name: "VMOVDload", argLength: 2, reg: fpload, asm: "VMOVD", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"},
+               {name: "VMOVSSload", argLength: 2, reg: fpload, asm: "VMOVSS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
+               {name: "VMOVSDload", argLength: 2, reg: fpload, asm: "VMOVSD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
+
+               {name: "VMOVSSconst", reg: fp01, asm: "VMOVSS", aux: "Float32", rematerializeable: true},
+               {name: "VMOVSDconst", reg: fp01, asm: "VMOVSD", aux: "Float64", rematerializeable: true},
+
                {name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"},
                {name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
 
index 7f6e9a0282cb28c83240e81a49a5d5cdabad5c62..f0c18d081663f252b432554f00bfde682808762f 100644 (file)
@@ -1218,6 +1218,12 @@ const (
        OpAMD64VMOVSSf2v
        OpAMD64VMOVQ
        OpAMD64VMOVD
+       OpAMD64VMOVQload
+       OpAMD64VMOVDload
+       OpAMD64VMOVSSload
+       OpAMD64VMOVSDload
+       OpAMD64VMOVSSconst
+       OpAMD64VMOVSDconst
        OpAMD64VZEROUPPER
        OpAMD64VZEROALL
        OpAMD64KMOVQload
@@ -18925,6 +18931,94 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "VMOVQload",
+               auxType:        auxSymOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.AVMOVQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:           "VMOVDload",
+               auxType:        auxSymOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.AVMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:           "VMOVSSload",
+               auxType:        auxSymOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.AVMOVSS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:           "VMOVSDload",
+               auxType:        auxSymOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.AVMOVSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:              "VMOVSSconst",
+               auxType:           auxFloat32,
+               argLen:            0,
+               rematerializeable: true,
+               asm:               x86.AVMOVSS,
+               reg: regInfo{
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:              "VMOVSDconst",
+               auxType:           auxFloat64,
+               argLen:            0,
+               rematerializeable: true,
+               asm:               x86.AVMOVSD,
+               reg: regInfo{
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:   "VZEROUPPER",
                argLen: 0,
index 469417536fddefe4045e24acfcb14b19a8f2a3f8..8fec5d5b9ad6488a66e44a375083c4b7bbeb3f6c 100644 (file)
@@ -507,6 +507,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64TESTW(v)
        case OpAMD64TESTWconst:
                return rewriteValueAMD64_OpAMD64TESTWconst(v)
+       case OpAMD64VMOVD:
+               return rewriteValueAMD64_OpAMD64VMOVD(v)
        case OpAMD64VMOVDQU16Masked512:
                return rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v)
        case OpAMD64VMOVDQU32Masked512:
@@ -515,6 +517,12 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v)
        case OpAMD64VMOVDQU8Masked512:
                return rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v)
+       case OpAMD64VMOVQ:
+               return rewriteValueAMD64_OpAMD64VMOVQ(v)
+       case OpAMD64VMOVSDf2v:
+               return rewriteValueAMD64_OpAMD64VMOVSDf2v(v)
+       case OpAMD64VMOVSSf2v:
+               return rewriteValueAMD64_OpAMD64VMOVSSf2v(v)
        case OpAMD64VPANDQ512:
                return rewriteValueAMD64_OpAMD64VPANDQ512(v)
        case OpAMD64VPBROADCASTB128:
@@ -26442,6 +26450,34 @@ func rewriteValueAMD64_OpAMD64TESTWconst(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64VMOVD(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (VMOVD x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (VMOVDload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64VMOVDload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -28799,6 +28835,114 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64VMOVQ(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (VMOVQ x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (VMOVQload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64VMOVQload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VMOVSDf2v(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (VMOVSDf2v x:(MOVSDload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (VMOVSDload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVSDload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64VMOVSDload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (VMOVSDf2v x:(MOVSDconst [c] ))
+       // result: (VMOVSDconst [c] )
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVSDconst {
+                       break
+               }
+               c := auxIntToFloat64(x.AuxInt)
+               v.reset(OpAMD64VMOVSDconst)
+               v.AuxInt = float64ToAuxInt(c)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VMOVSSf2v(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (VMOVSSf2v x:(MOVSSload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (VMOVSSload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVSSload {
+                       break
+               }
+               off := auxIntToInt32(x.AuxInt)
+               sym := auxToSym(x.Aux)
+               mem := x.Args[1]
+               ptr := x.Args[0]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(x.Pos, OpAMD64VMOVSSload, v.Type)
+               v.copyOf(v0)
+               v0.AuxInt = int32ToAuxInt(off)
+               v0.Aux = symToAux(sym)
+               v0.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (VMOVSSf2v x:(MOVSSconst [c] ))
+       // result: (VMOVSSconst [c] )
+       for {
+               x := v_0
+               if x.Op != OpAMD64MOVSSconst {
+                       break
+               }
+               c := auxIntToFloat32(x.AuxInt)
+               v.reset(OpAMD64VMOVSSconst)
+               v.AuxInt = float32ToAuxInt(c)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64VPANDQ512(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 48287546b387696e7ef43450539e6600df4f52e4..9c8e5e96f82113ff284dede0629fc02cabf0f557 100644 (file)
@@ -236,7 +236,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
        // Rewrite float constants to values stored in memory.
        switch p.As {
        // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
-       case AMOVSS:
+       case AMOVSS, AVMOVSS:
                if p.From.Type == obj.TYPE_FCONST {
                        //  f == 0 can't be used here due to -0, so use Float64bits
                        if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
@@ -272,7 +272,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                        p.From.Offset = 0
                }
 
-       case AMOVSD:
+       case AMOVSD, AVMOVSD:
                // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
                if p.From.Type == obj.TYPE_FCONST {
                        //  f == 0 can't be used here due to -0, so use Float64bits