From a4a0f9b1481e9be4361eb9c4998eca08e2eedb0a Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Mon, 7 Jun 2021 14:11:20 -0500 Subject: [PATCH] cmd/compile: make XER allocatable register on PPC64 This is the first step towards decomposing aggregate operations which create or consume the CA bit of the XER. This helps optimize the canned sequence of Add64Carry (and Sub64Borrow if it were implemented similarly) by minimizing extraneous operations related to loading the CA bit, reloading CA in chained operations, or extracting it when unused. Likewise, mark the operations which clobber CA. Change-Id: I33e6dd2654a8cc39fcdbb9690a495f03558cdc97 Reviewed-on: https://go-review.googlesource.com/c/go/+/346869 Trust: Paul Murphy Reviewed-by: Cherry Mui Run-TryBot: Cherry Mui TryBot-Result: Gopher Robot --- src/cmd/compile/internal/ssa/config.go | 1 + src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 57 ++++++++++---------- src/cmd/compile/internal/ssa/opGen.go | 17 +++--- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index fa5b02b325..b9c98bdba9 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -239,6 +239,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo c.registers = registersPPC64[:] c.gpRegMask = gpRegMaskPPC64 c.fpRegMask = fpRegMaskPPC64 + c.specialRegMask = specialRegMaskPPC64 c.intParamRegs = paramIntRegPPC64 c.floatParamRegs = paramFloatRegPPC64 c.FPReg = framepointerRegPPC64 diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index ff78be263e..c079c1cedb 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -98,7 +98,6 @@ var regNamesPPC64 = []string{ // "CR7", // "CR", - // "XER", // "LR", // "CTR", } @@ -125,11 +124,12 @@ func init() { } var ( - gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29") - fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30") - sp = buildReg("SP") - sb = buildReg("SB") - gr = buildReg("g") + gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29") + fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30") + sp = buildReg("SP") + sb = buildReg("SB") + gr = buildReg("g") + xer = buildReg("XER") // cr = buildReg("CR") // ctr = buildReg("CTR") // lr = buildReg("LR") @@ -139,8 +139,10 @@ func init() { // tls = buildReg("R13") gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}} + gp11cxer = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer} gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} gp21a0 = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}} + gp21cxer = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer} gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}} gp32 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}} @@ -168,21 +170,21 @@ func init() { fploadidx = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gp | sp | sb, fp}} fpstoreidx = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}} - callerSave = regMask(gp | fp | gr) + callerSave = regMask(gp | fp | gr | xer) r3 = buildReg("R3") r4 = buildReg("R4") r5 = buildReg("R5") r6 = buildReg("R6") ) ops := []opData{ - {name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1 - {name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"}, // arg0 + auxInt - {name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true}, // arg0+arg1 - {name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1 - {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0-arg1 - {name: "SUBFCconst", argLength: 1, reg: gp11, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (with carry) - {name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1 - {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1 + {name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1 + {name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"}, // arg0 + auxInt + {name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true}, // arg0+arg1 + {name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1 + {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0-arg1 + {name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (carry is ignored) + {name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1 + {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1 {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit) {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit) @@ -204,12 +206,12 @@ func init() { {name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"}, // arg0*arg1 - arg2 {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2 - {name: "SRAD", argLength: 2, reg: gp21, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!) - {name: "SRAW", argLength: 2, reg: gp21, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width - {name: "SRD", argLength: 2, reg: gp21, asm: "SRD"}, // unsigned arg0 >> (arg1&127), 64 bit width - {name: "SRW", argLength: 2, reg: gp21, asm: "SRW"}, // unsigned arg0 >> (arg1&63), 32 bit width - {name: "SLD", argLength: 2, reg: gp21, asm: "SLD"}, // arg0 << (arg1&127), 64 bit width - {name: "SLW", argLength: 2, reg: gp21, asm: "SLW"}, // arg0 << (arg1&63), 32 bit width + {name: "SRAD", argLength: 2, reg: gp21cxer, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!) + {name: "SRAW", argLength: 2, reg: gp21cxer, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width + {name: "SRD", argLength: 2, reg: gp21, asm: "SRD"}, // unsigned arg0 >> (arg1&127), 64 bit width + {name: "SRW", argLength: 2, reg: gp21, asm: "SRW"}, // unsigned arg0 >> (arg1&63), 32 bit width + {name: "SLD", argLength: 2, reg: gp21, asm: "SLD"}, // arg0 << (arg1&127), 64 bit width + {name: "SLW", argLength: 2, reg: gp21, asm: "SLW"}, // arg0 << (arg1&63), 32 bit width {name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64 {name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32 @@ -221,12 +223,12 @@ func init() { {name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true}, // arg0 + arg1 + carry, returns (sum, carry) - {name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width - {name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width - {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"}, // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width - {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"}, // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width - {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"}, // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width - {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"}, // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width + {name: "SRADconst", argLength: 1, reg: gp11cxer, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width + {name: "SRAWconst", argLength: 1, reg: gp11cxer, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width + {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"}, // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width + {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"}, // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width + {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"}, // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width + {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"}, // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits @@ -722,6 +724,7 @@ func init() { ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12", gpregmask: gp, fpregmask: fp, + specialregmask: xer, framepointerreg: -1, linkreg: -1, // not used }) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 5b379a7fd3..05ed0220ff 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -25134,6 +25134,7 @@ var opcodeTable = [...]opInfo{ inputs: []inputInfo{ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, + clobbers: 9223372036854775808, // XER outputs: []outputInfo{ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, @@ -25414,6 +25415,7 @@ var opcodeTable = [...]opInfo{ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, + clobbers: 9223372036854775808, // XER outputs: []outputInfo{ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, @@ -25428,6 +25430,7 @@ var opcodeTable = [...]opInfo{ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, + clobbers: 9223372036854775808, // XER outputs: []outputInfo{ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, @@ -25584,6 +25587,7 @@ var opcodeTable = [...]opInfo{ inputs: []inputInfo{ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, + clobbers: 9223372036854775808, // XER outputs: []outputInfo{ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, @@ -25598,6 +25602,7 @@ var opcodeTable = [...]opInfo{ inputs: []inputInfo{ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, + clobbers: 9223372036854775808, // XER outputs: []outputInfo{ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 }, @@ -27461,7 +27466,7 @@ var opcodeTable = [...]opInfo{ clobberFlags: true, call: true, reg: regInfo{ - clobbers: 9223372034707283960, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 g F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 + clobbers: 18446744071562059768, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 g F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 XER }, }, { @@ -27472,7 +27477,7 @@ var opcodeTable = [...]opInfo{ call: true, tailCall: true, reg: regInfo{ - clobbers: 9223372034707283960, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 g F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 + clobbers: 18446744071562059768, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 g F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 XER }, }, { @@ -27486,7 +27491,7 @@ var opcodeTable = [...]opInfo{ {0, 4096}, // R12 {1, 2048}, // R11 }, - clobbers: 9223372034707283960, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 g F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 + clobbers: 18446744071562059768, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 g F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 XER }, }, { @@ -27499,7 +27504,7 @@ var opcodeTable = [...]opInfo{ inputs: []inputInfo{ {0, 4096}, // R12 }, - clobbers: 9223372034707283960, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 g F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 + clobbers: 18446744071562059768, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 g F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 XER }, }, { @@ -27882,7 +27887,7 @@ var opcodeTable = [...]opInfo{ {0, 1048576}, // R20 {1, 2097152}, // R21 }, - clobbers: 9223372035777632256, // R11 R12 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 + clobbers: 18446744072632408064, // R11 R12 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 XER }, }, { @@ -37287,7 +37292,7 @@ var paramIntRegPPC64 = []int8{3, 4, 5, 6, 7, 8, 9, 10, 14, 15, 16, 17} var paramFloatRegPPC64 = []int8{33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44} var gpRegMaskPPC64 = regMask(1073733624) var fpRegMaskPPC64 = regMask(9223372032559808512) -var specialRegMaskPPC64 = regMask(0) +var specialRegMaskPPC64 = regMask(9223372036854775808) var framepointerRegPPC64 = int8(-1) var linkRegPPC64 = int8(-1) var registersRISCV64 = [...]Register{ -- 2.48.1