]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: wire up bits.Reverse intrinsics for loong64
authorXiaolin Zhao <zhaoxiaolin@loongson.cn>
Sat, 2 Nov 2024 07:40:13 +0000 (15:40 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Mon, 11 Nov 2024 00:08:45 +0000 (00:08 +0000)
Micro-benchmark results on Loongson 3A5000 and 3A6000:

goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A6000 @ 2500.00MHz
          |  CL 624576   |               this CL                |
          |    sec/op    |    sec/op     vs base                |
Reverse     2.8130n ± 0%   0.8008n ± 0%  -71.53% (p=0.000 n=20)
Reverse8    0.7014n ± 0%   0.4040n ± 0%  -42.40% (p=0.000 n=20)
Reverse16   1.2975n ± 0%   0.6632n ± 1%  -48.89% (p=0.000 n=20)
Reverse32   2.7520n ± 0%   0.4042n ± 0%  -85.31% (p=0.000 n=20)
Reverse64   2.8970n ± 0%   0.4041n ± 0%  -86.05% (p=0.000 n=20)
geomean      1.828n        0.5116n       -72.01%

goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A5000 @ 2500.00MHz
          |  CL 624576   |               this CL                |
          |    sec/op    |    sec/op     vs base                |
Reverse     4.0050n ± 0%   0.8011n ± 0%  -80.00% (p=0.000 n=20)
Reverse8    0.8010n ± 0%   0.5210n ± 1%  -34.96% (p=0.000 n=20)
Reverse16   1.6160n ± 0%   0.6008n ± 0%  -62.82% (p=0.000 n=20)
Reverse32   3.8550n ± 0%   0.5179n ± 0%  -86.57% (p=0.000 n=20)
Reverse64   3.8050n ± 0%   0.5177n ± 0%  -86.40% (p=0.000 n=20)
geomean      2.378n        0.5828n       -75.49%

Updates #59120

This patch is a copy of CL 483656.
Co-authored-by: WANG Xuerui <git@xen0n.name>
Change-Id: I98681091763279279c8404bd0295785f13ea1c8e
Reviewed-on: https://go-review.googlesource.com/c/go/+/624276
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/loong64/ssa.go
src/cmd/compile/internal/ssa/_gen/LOONG64.rules
src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteLOONG64.go
src/cmd/compile/internal/ssagen/intrinsics.go
src/cmd/compile/internal/ssagen/intrinsics_test.go
test/codegen/mathbits.go

index e7cb82a280a9d794aee69530ef4e6d16904f1530..4c9bcfe46edf7719bd33b46872f7fcdaedcd47e5 100644 (file)
@@ -516,6 +516,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpLOONG64REVB2H,
                ssa.OpLOONG64REVB2W,
                ssa.OpLOONG64REVBV,
+               ssa.OpLOONG64BITREV4B,
+               ssa.OpLOONG64BITREVW,
+               ssa.OpLOONG64BITREVV,
                ssa.OpLOONG64ABSD:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
index bac1f27b1dceec37d32eaa861f8f79d7a916d30c..6ff98a46f75f14a19a312793b2a05d4670035ca6 100644 (file)
 (BitLen64 <t> x) => (NEGV <t> (SUBVconst <t> [64] (CLZV <t> x)))
 (BitLen32 <t> x) => (NEGV <t> (SUBVconst <t> [32] (CLZW <t> x)))
 (Bswap(16|32|64) ...) => (REVB(2H|2W|V) ...)
+(BitRev8 ...) => (BITREV4B ...)
+(BitRev16 <t> x) => (REVB2H (BITREV4B <t> x))
+(BitRev32 ...) => (BITREVW ...)
+(BitRev64 ...) => (BITREVV ...)
 
 // math package intrinsics
 (Sqrt ...) => (SQRTD ...)
index 0da0bb82279ac5e8c394a452abb45369b1344f17..a8a38ee7b8c6a7049e73db98870319c7f5c89e32 100644 (file)
@@ -216,6 +216,10 @@ func init() {
                {name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655
                {name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"},   // Swap bytes: 0x1122334455667788 -> 0x8877665544332211
 
+               {name: "BITREV4B", argLength: 1, reg: gp11, asm: "BITREV4B"}, // Reverse the bits of each byte inside a 32-bit arg[0]
+               {name: "BITREVW", argLength: 1, reg: gp11, asm: "BITREVW"},   // Reverse the bits in a 32-bit arg[0]
+               {name: "BITREVV", argLength: 1, reg: gp11, asm: "BITREVV"},   // Reverse the bits in a 64-bit arg[0]
+
                {name: "FMINF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMINF", commutative: true, typ: "Float32"}, // min(arg0, arg1), float32
                {name: "FMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1), float64
                {name: "FMAXF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXF", commutative: true, typ: "Float32"}, // max(arg0, arg1), float32
index f1006a3f3cad555bebbd13801df0f2e33a1d13c9..ae0e87702aaa4a6d18c5dded89329152faa28b9a 100644 (file)
@@ -1801,6 +1801,9 @@ const (
        OpLOONG64REVB2H
        OpLOONG64REVB2W
        OpLOONG64REVBV
+       OpLOONG64BITREV4B
+       OpLOONG64BITREVW
+       OpLOONG64BITREVV
        OpLOONG64FMINF
        OpLOONG64FMIND
        OpLOONG64FMAXF
@@ -24201,6 +24204,45 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "BITREV4B",
+               argLen: 1,
+               asm:    loong64.ABITREV4B,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
+       {
+               name:   "BITREVW",
+               argLen: 1,
+               asm:    loong64.ABITREVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
+       {
+               name:   "BITREVV",
+               argLen: 1,
+               asm:    loong64.ABITREVV,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
        {
                name:            "FMINF",
                argLen:          2,
index 88c5036b54174921b053254238596c2410b1efbf..779ec89134e9515c390ca0d199e76545fca41bc8 100644 (file)
@@ -125,6 +125,17 @@ func rewriteValueLOONG64(v *Value) bool {
                return rewriteValueLOONG64_OpBitLen32(v)
        case OpBitLen64:
                return rewriteValueLOONG64_OpBitLen64(v)
+       case OpBitRev16:
+               return rewriteValueLOONG64_OpBitRev16(v)
+       case OpBitRev32:
+               v.Op = OpLOONG64BITREVW
+               return true
+       case OpBitRev64:
+               v.Op = OpLOONG64BITREVV
+               return true
+       case OpBitRev8:
+               v.Op = OpLOONG64BITREV4B
+               return true
        case OpBswap16:
                v.Op = OpLOONG64REVB2H
                return true
@@ -975,6 +986,21 @@ func rewriteValueLOONG64_OpBitLen64(v *Value) bool {
                return true
        }
 }
+func rewriteValueLOONG64_OpBitRev16(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (BitRev16 <t> x)
+       // result: (REVB2H (BITREV4B <t> x))
+       for {
+               t := v.Type
+               x := v_0
+               v.reset(OpLOONG64REVB2H)
+               v0 := b.NewValue0(v.Pos, OpLOONG64BITREV4B, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValueLOONG64_OpCom16(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
index 33345e9296ec6e9339636361278af58d36171cd0..db335ee8b38e823e131da20f850a6aa31caf6b1b 100644 (file)
@@ -946,27 +946,27 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0])
                },
-               sys.ARM64)
+               sys.ARM64, sys.Loong64)
        addF("math/bits", "Reverse32",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpBitRev32, types.Types[types.TINT], args[0])
                },
-               sys.ARM64)
+               sys.ARM64, sys.Loong64)
        addF("math/bits", "Reverse16",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpBitRev16, types.Types[types.TINT], args[0])
                },
-               sys.ARM64)
+               sys.ARM64, sys.Loong64)
        addF("math/bits", "Reverse8",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpBitRev8, types.Types[types.TINT], args[0])
                },
-               sys.ARM64)
+               sys.ARM64, sys.Loong64)
        addF("math/bits", "Reverse",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0])
                },
-               sys.ARM64)
+               sys.ARM64, sys.Loong64)
        addF("math/bits", "RotateLeft8",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpRotateLeft8, types.Types[types.TUINT8], args[0], args[1])
index a8656cc3d4bb2cff97d21851d3b4cd21a6a1867c..ca9e1c9e0ac7cfa45b753365b7da0e45e24b0c0b 100644 (file)
@@ -421,6 +421,11 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"loong64", "math/bits", "Len16"}:                          struct{}{},
        {"loong64", "math/bits", "Len32"}:                          struct{}{},
        {"loong64", "math/bits", "Len64"}:                          struct{}{},
+       {"loong64", "math/bits", "Reverse"}:                        struct{}{},
+       {"loong64", "math/bits", "Reverse8"}:                       struct{}{},
+       {"loong64", "math/bits", "Reverse16"}:                      struct{}{},
+       {"loong64", "math/bits", "Reverse32"}:                      struct{}{},
+       {"loong64", "math/bits", "Reverse64"}:                      struct{}{},
        {"loong64", "math/bits", "RotateLeft"}:                     struct{}{},
        {"loong64", "math/bits", "RotateLeft32"}:                   struct{}{},
        {"loong64", "math/bits", "RotateLeft64"}:                   struct{}{},
index 715f67a3c8b1a41f3541935ff54234cb6b60092b..a3d1143424b421dbf26273ba96c087f4ee30a9d0 100644 (file)
@@ -199,6 +199,35 @@ func OnesCount8(n uint8) int {
        return bits.OnesCount8(n)
 }
 
+// ------------------ //
+//    bits.Reverse    //
+// ------------------ //
+
+func Reverse(n uint) uint {
+       // loong64:"BITREVV"
+       return bits.Reverse(n)
+}
+
+func Reverse64(n uint64) uint64 {
+       // loong64:"BITREVV"
+       return bits.Reverse64(n)
+}
+
+func Reverse32(n uint32) uint32 {
+       // loong64:"BITREVW"
+       return bits.Reverse32(n)
+}
+
+func Reverse16(n uint16) uint16 {
+       // loong64:"BITREV4B","REVB2H"
+       return bits.Reverse16(n)
+}
+
+func Reverse8(n uint8) uint8 {
+       // loong64:"BITREV4B"
+       return bits.Reverse8(n)
+}
+
 // ----------------------- //
 //    bits.ReverseBytes    //
 // ----------------------- //