]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: use generated loops instead of DUFFZERO on riscv64
authorMeng Zhuo <mengzhuo@iscas.ac.cn>
Thu, 28 Aug 2025 07:05:27 +0000 (07:05 +0000)
committerMeng Zhuo <mengzhuo@iscas.ac.cn>
Wed, 10 Sep 2025 02:41:39 +0000 (19:41 -0700)
MemclrKnownSize112-4          5.602Gi ± 0%    5.601Gi ± 0%         ~ (p=0.363 n=10)
MemclrKnownSize128-4          6.933Gi ± 1%    6.545Gi ± 1%    -5.59% (p=0.000 n=10)
MemclrKnownSize192-4          8.055Gi ± 1%    7.804Gi ± 0%    -3.12% (p=0.000 n=10)
MemclrKnownSize248-4          8.489Gi ± 0%    8.718Gi ± 0%    +2.69% (p=0.000 n=10)
MemclrKnownSize256-4          8.762Gi ± 0%    8.763Gi ± 0%         ~ (p=0.494 n=10)
MemclrKnownSize512-4          9.514Gi ± 1%    9.514Gi ± 0%         ~ (p=0.529 n=10)
MemclrKnownSize1024-4         9.940Gi ± 0%    9.939Gi ± 1%         ~ (p=0.989 n=10)
ClearFat3-4                   1.300Gi ± 0%    1.301Gi ±  0%         ~ (p=0.447 n=10)
ClearFat4-4                   3.902Gi ± 0%    3.902Gi ±  0%         ~ (p=0.971 n=10)
ClearFat5-4                   665.8Mi ± 0%   1331.5Mi ±  0%  +100.01% (p=0.000 n=10)
ClearFat6-4                   665.8Mi ± 0%   1330.5Mi ±  0%   +99.82% (p=0.000 n=10)
ClearFat7-4                   490.7Mi ± 0%   1331.9Mi ±  0%  +171.45% (p=0.000 n=10)
ClearFat8-4                   5.201Gi ± 0%    5.202Gi ±  0%         ~ (p=0.123 n=10)
ClearFat9-4                   856.1Mi ± 0%   1331.6Mi ±  0%   +55.54% (p=0.000 n=10)
ClearFat10-4                  887.8Mi ± 0%   1331.9Mi ±  0%   +50.03% (p=0.000 n=10)
ClearFat11-4                  915.3Mi ± 0%   1331.1Mi ±  0%   +45.42% (p=0.000 n=10)
ClearFat12-4                  5.202Gi ± 0%    5.202Gi ±  0%         ~ (p=0.481 n=10)
ClearFat13-4                  961.5Mi ± 0%   1331.8Mi ±  0%   +38.50% (p=0.000 n=10)
ClearFat14-4                  981.0Mi ± 0%   1331.8Mi ±  0%   +35.76% (p=0.000 n=10)
ClearFat15-4                  951.3Mi ± 0%   1331.4Mi ±  0%   +39.96% (p=0.000 n=10)
ClearFat16-4                  1.600Gi ± 0%    5.202Gi ±  0%  +225.10% (p=0.000 n=10)
ClearFat18-4                  1.018Gi ± 0%    1.300Gi ±  0%   +27.77% (p=0.000 n=10)
ClearFat20-4                  2.601Gi ± 0%    4.938Gi ± 12%   +89.87% (p=0.000 n=10)
ClearFat24-4                  2.601Gi ± 0%    5.201Gi ±  0%   +99.96% (p=0.000 n=10)
ClearFat32-4                  1.982Gi ± 0%    5.203Gi ±  0%  +162.55% (p=0.000 n=10)
ClearFat40-4                  3.467Gi ± 0%    4.338Gi ±  0%   +25.11% (p=0.000 n=10)
ClearFat48-4                  3.671Gi ± 0%    5.201Gi ±  0%   +41.69% (p=0.000 n=10)
ClearFat56-4                  3.640Gi ± 0%    5.201Gi ±  0%   +42.88% (p=0.000 n=10)
ClearFat64-4                  2.250Gi ± 0%    5.202Gi ±  0%  +131.25% (p=0.000 n=10)
ClearFat72-4                  4.064Gi ± 0%    5.201Gi ±  0%   +27.97% (p=0.000 n=10)
ClearFat128-4                 4.496Gi ± 0%    5.203Gi ±  0%   +15.71% (p=0.000 n=10)
ClearFat256-4                 4.756Gi ± 0%    5.201Gi ±  0%    +9.36% (p=0.000 n=10)
ClearFat512-4                 2.512Gi ± 0%    5.201Gi ±  0%  +107.03% (p=0.000 n=10)
ClearFat1024-4                4.255Gi ± 0%    5.202Gi ±  0%   +22.26% (p=0.000 n=10)
ClearFat1032-4                4.260Gi ± 0%    5.201Gi ±  0%   +22.09% (p=0.000 n=10)
ClearFat1040-4                4.285Gi ± 1%    5.203Gi ±  0%   +21.41% (p=0.000 n=10)
geomean                       2.005Gi         3.020Gi         +50.58%

Change-Id: Iea1da734ff8eaf1b5a2822ae2bdb7f4fd9b65651
Reviewed-on: https://go-review.googlesource.com/c/go/+/699635
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
src/cmd/compile/internal/riscv64/ssa.go
src/cmd/compile/internal/ssa/_gen/RISCV64.rules
src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteRISCV64.go

index 88733b0d644a4b1de5f496eb96f20bdaadc43034..da281974907011e1dd372c43cdc468c6ba02f981 100644 (file)
@@ -181,6 +181,8 @@ func largestMove(alignment int64) (obj.As, int64) {
        }
 }
 
+var fracMovOps = []obj.As{riscv.AMOVB, riscv.AMOVH, riscv.AMOVW, riscv.AMOV}
+
 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
 // RISC-V has no flags, so this is a no-op.
 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {}
@@ -738,30 +740,86 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.RegTo2 = riscv.REG_ZERO
 
        case ssa.OpRISCV64LoweredZero:
-               mov, sz := largestMove(v.AuxInt)
+               ptr := v.Args[0].Reg()
+               sc := v.AuxValAndOff()
+               n := sc.Val64()
+
+               mov, sz := largestMove(sc.Off64())
+
+               // mov  ZERO, (offset)(Rarg0)
+               var off int64
+               for n >= sz {
+                       zeroOp(s, mov, ptr, off)
+                       off += sz
+                       n -= sz
+               }
 
-               //      mov     ZERO, (Rarg0)
-               //      ADD     $sz, Rarg0
-               //      BGEU    Rarg1, Rarg0, -2(PC)
+               for i := len(fracMovOps) - 1; i >= 0; i-- {
+                       tsz := int64(1 << i)
+                       if n < tsz {
+                               continue
+                       }
+                       zeroOp(s, fracMovOps[i], ptr, off)
+                       off += tsz
+                       n -= tsz
+               }
 
-               p := s.Prog(mov)
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = riscv.REG_ZERO
-               p.To.Type = obj.TYPE_MEM
-               p.To.Reg = v.Args[0].Reg()
+       case ssa.OpRISCV64LoweredZeroLoop:
+               ptr := v.Args[0].Reg()
+               sc := v.AuxValAndOff()
+               n := sc.Val64()
+               mov, sz := largestMove(sc.Off64())
+               chunk := 8 * sz
+
+               if n <= 3*chunk {
+                       v.Fatalf("ZeroLoop too small:%d, expect:%d", n, 3*chunk)
+               }
+
+               tmp := v.RegTmp()
+
+               p := s.Prog(riscv.AADD)
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = n - n%chunk
+               p.Reg = ptr
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = tmp
+
+               for i := int64(0); i < 8; i++ {
+                       zeroOp(s, mov, ptr, sz*i)
+               }
 
                p2 := s.Prog(riscv.AADD)
                p2.From.Type = obj.TYPE_CONST
-               p2.From.Offset = sz
+               p2.From.Offset = chunk
                p2.To.Type = obj.TYPE_REG
-               p2.To.Reg = v.Args[0].Reg()
+               p2.To.Reg = ptr
 
-               p3 := s.Prog(riscv.ABGEU)
-               p3.To.Type = obj.TYPE_BRANCH
-               p3.Reg = v.Args[0].Reg()
+               p3 := s.Prog(riscv.ABNE)
+               p3.From.Reg = tmp
                p3.From.Type = obj.TYPE_REG
-               p3.From.Reg = v.Args[1].Reg()
-               p3.To.SetTarget(p)
+               p3.Reg = ptr
+               p3.To.Type = obj.TYPE_BRANCH
+               p3.To.SetTarget(p.Link)
+
+               n %= chunk
+
+               // mov  ZERO, (offset)(Rarg0)
+               var off int64
+               for n >= sz {
+                       zeroOp(s, mov, ptr, off)
+                       off += sz
+                       n -= sz
+               }
+
+               for i := len(fracMovOps) - 1; i >= 0; i-- {
+                       tsz := int64(1 << i)
+                       if n < tsz {
+                               continue
+                       }
+                       zeroOp(s, fracMovOps[i], ptr, off)
+                       off += tsz
+                       n -= tsz
+               }
 
        case ssa.OpRISCV64LoweredMove:
                mov, sz := largestMove(v.AuxInt)
@@ -955,3 +1013,13 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
        p.Pos = p.Pos.WithNotStmt()
        return p
 }
+
+func zeroOp(s *ssagen.State, mov obj.As, reg int16, off int64) {
+       p := s.Prog(mov)
+       p.From.Type = obj.TYPE_REG
+       p.From.Reg = riscv.REG_ZERO
+       p.To.Type = obj.TYPE_MEM
+       p.To.Reg = reg
+       p.To.Offset = off
+       return
+}
index 821f822746ec5ae23c82f27c3252d3b403c3bfd7..93828777954118167857c6a3703964e771f23067 100644 (file)
        (MOVHstore [4] ptr (MOVDconst [0])
                (MOVHstore [2] ptr (MOVDconst [0])
                        (MOVHstore ptr (MOVDconst [0]) mem)))
-(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 =>
-       (MOVWstore [8] ptr (MOVDconst [0])
-               (MOVWstore [4] ptr (MOVDconst [0])
-                       (MOVWstore ptr (MOVDconst [0]) mem)))
-(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 =>
-       (MOVDstore [8] ptr (MOVDconst [0])
-               (MOVDstore ptr (MOVDconst [0]) mem))
-(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 =>
-       (MOVDstore [16] ptr (MOVDconst [0])
-               (MOVDstore [8] ptr (MOVDconst [0])
-                       (MOVDstore ptr (MOVDconst [0]) mem)))
-(Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 =>
-       (MOVDstore [24] ptr (MOVDconst [0])
-               (MOVDstore [16] ptr (MOVDconst [0])
-                       (MOVDstore [8] ptr (MOVDconst [0])
-                               (MOVDstore ptr (MOVDconst [0]) mem))))
-
-// Medium 8-aligned zeroing uses a Duff's device
-// 8 and 128 are magic constants, see runtime/mkduff.go
-(Zero [s] {t} ptr mem)
-       && s%8 == 0 && s <= 8*128
-       && t.Alignment()%8 == 0 =>
-       (DUFFZERO [8 * (128 - s/8)] ptr mem)
+
+// Unroll zeroing in medium size (at most 192 bytes i.e. 3 cachelines)
+(Zero [s] {t} ptr mem) && s <= 24*moveSize(t.Alignment(), config) =>
+       (LoweredZero [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
 
 // Generic zeroing uses a loop
-(Zero [s] {t} ptr mem) =>
-       (LoweredZero [t.Alignment()]
-               ptr
-               (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)]))
-               mem)
+(Zero [s] {t} ptr mem) && s > 24*moveSize(t.Alignment(), config) =>
+       (LoweredZeroLoop [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
 
 // Checks
 (IsNonNil ...) => (SNEZ ...)
index 0bccaf63bc4182cf9dc24d4d5b51c52f80f9f8c4..8e2f85b8d70f5bbafd71b345040d8f1443353634 100644 (file)
@@ -317,25 +317,40 @@ func init() {
 
                // Generic moves and zeros
 
-               // general unaligned zeroing
-               // arg0 = address of memory to zero (in X5, changed as side effect)
-               // arg1 = address of the last element to zero (inclusive)
-               // arg2 = mem
-               // auxint = element size
+               // general unrolled zeroing
+               // arg0 = address of memory to zero
+               // arg1 = mem
+               // auxint = element size and type alignment
                // returns mem
-               //      mov     ZERO, (X5)
-               //      ADD     $sz, X5
-               //      BGEU    Rarg1, X5, -2(PC)
+               //      mov     ZERO, (OFFSET)(Rarg0)
                {
-                       name:      "LoweredZero",
-                       aux:       "Int64",
-                       argLength: 3,
+                       name:           "LoweredZero",
+                       aux:            "SymValAndOff",
+                       typ:            "Mem",
+                       argLength:      2,
+                       symEffect:      "Write",
+                       faultOnNilArg0: true,
                        reg: regInfo{
-                               inputs:   []regMask{regNamed["X5"], gpMask},
-                               clobbers: regNamed["X5"],
+                               inputs: []regMask{gpMask},
                        },
+               },
+               // general unaligned zeroing
+               // arg0 = address of memory to zero (clobber)
+               // arg2 = mem
+               // auxint = element size and type alignment
+               // returns mem
+               {
+                       name:           "LoweredZeroLoop",
+                       aux:            "SymValAndOff",
                        typ:            "Mem",
+                       argLength:      2,
+                       symEffect:      "Write",
+                       needIntTemp:    true,
                        faultOnNilArg0: true,
+                       reg: regInfo{
+                               inputs:       []regMask{gpMask},
+                               clobbersArg0: true,
+                       },
                },
 
                // general unaligned move
index 60ac188e1ed90c6e5daa3975f96254d2eb83747e..5f9572d6752386d0096f64fb2eadb04a449d68fe 100644 (file)
@@ -2569,6 +2569,7 @@ const (
        OpRISCV64DUFFZERO
        OpRISCV64DUFFCOPY
        OpRISCV64LoweredZero
+       OpRISCV64LoweredZeroLoop
        OpRISCV64LoweredMove
        OpRISCV64LoweredAtomicLoad8
        OpRISCV64LoweredAtomicLoad32
@@ -34558,15 +34559,28 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:           "LoweredZero",
-               auxType:        auxInt64,
-               argLen:         3,
+               auxType:        auxSymValAndOff,
+               argLen:         2,
                faultOnNilArg0: true,
+               symEffect:      SymWrite,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 16},         // X5
-                               {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                               {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+               },
+       },
+       {
+               name:           "LoweredZeroLoop",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               needIntTemp:    true,
+               faultOnNilArg0: true,
+               symEffect:      SymWrite,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
                        },
-                       clobbers: 16, // X5
+                       clobbersArg0: true,
                },
        },
        {
index e2c400b0c5e81172ef046d490cc52dc372128253..faa465b9db977a3e78a0947980428f47d2916c5a 100644 (file)
@@ -9925,138 +9925,39 @@ func rewriteValueRISCV64_OpZero(v *Value) bool {
                v.AddArg3(ptr, v0, v1)
                return true
        }
-       // match: (Zero [12] {t} ptr mem)
-       // cond: t.Alignment()%4 == 0
-       // result: (MOVWstore [8] ptr (MOVDconst [0]) (MOVWstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem)))
-       for {
-               if auxIntToInt64(v.AuxInt) != 12 {
-                       break
-               }
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mem := v_1
-               if !(t.Alignment()%4 == 0) {
-                       break
-               }
-               v.reset(OpRISCV64MOVWstore)
-               v.AuxInt = int32ToAuxInt(8)
-               v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem)
-               v1.AuxInt = int32ToAuxInt(4)
-               v2 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem)
-               v2.AddArg3(ptr, v0, mem)
-               v1.AddArg3(ptr, v0, v2)
-               v.AddArg3(ptr, v0, v1)
-               return true
-       }
-       // match: (Zero [16] {t} ptr mem)
-       // cond: t.Alignment()%8 == 0
-       // result: (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 16 {
-                       break
-               }
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mem := v_1
-               if !(t.Alignment()%8 == 0) {
-                       break
-               }
-               v.reset(OpRISCV64MOVDstore)
-               v.AuxInt = int32ToAuxInt(8)
-               v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
-               v1.AddArg3(ptr, v0, mem)
-               v.AddArg3(ptr, v0, v1)
-               return true
-       }
-       // match: (Zero [24] {t} ptr mem)
-       // cond: t.Alignment()%8 == 0
-       // result: (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))
-       for {
-               if auxIntToInt64(v.AuxInt) != 24 {
-                       break
-               }
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mem := v_1
-               if !(t.Alignment()%8 == 0) {
-                       break
-               }
-               v.reset(OpRISCV64MOVDstore)
-               v.AuxInt = int32ToAuxInt(16)
-               v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
-               v1.AuxInt = int32ToAuxInt(8)
-               v2 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
-               v2.AddArg3(ptr, v0, mem)
-               v1.AddArg3(ptr, v0, v2)
-               v.AddArg3(ptr, v0, v1)
-               return true
-       }
-       // match: (Zero [32] {t} ptr mem)
-       // cond: t.Alignment()%8 == 0
-       // result: (MOVDstore [24] ptr (MOVDconst [0]) (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))))
-       for {
-               if auxIntToInt64(v.AuxInt) != 32 {
-                       break
-               }
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mem := v_1
-               if !(t.Alignment()%8 == 0) {
-                       break
-               }
-               v.reset(OpRISCV64MOVDstore)
-               v.AuxInt = int32ToAuxInt(24)
-               v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
-               v1.AuxInt = int32ToAuxInt(16)
-               v2 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
-               v2.AuxInt = int32ToAuxInt(8)
-               v3 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
-               v3.AddArg3(ptr, v0, mem)
-               v2.AddArg3(ptr, v0, v3)
-               v1.AddArg3(ptr, v0, v2)
-               v.AddArg3(ptr, v0, v1)
-               return true
-       }
        // match: (Zero [s] {t} ptr mem)
-       // cond: s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0
-       // result: (DUFFZERO [8 * (128 - s/8)] ptr mem)
+       // cond: s <= 24*moveSize(t.Alignment(), config)
+       // result: (LoweredZero [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
        for {
                s := auxIntToInt64(v.AuxInt)
                t := auxToType(v.Aux)
                ptr := v_0
                mem := v_1
-               if !(s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0) {
+               if !(s <= 24*moveSize(t.Alignment(), config)) {
                        break
                }
-               v.reset(OpRISCV64DUFFZERO)
-               v.AuxInt = int64ToAuxInt(8 * (128 - s/8))
+               v.reset(OpRISCV64LoweredZero)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(s), int32(t.Alignment())))
                v.AddArg2(ptr, mem)
                return true
        }
        // match: (Zero [s] {t} ptr mem)
-       // result: (LoweredZero [t.Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem)
+       // cond: s > 24*moveSize(t.Alignment(), config)
+       // result: (LoweredZeroLoop [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
        for {
                s := auxIntToInt64(v.AuxInt)
                t := auxToType(v.Aux)
                ptr := v_0
                mem := v_1
-               v.reset(OpRISCV64LoweredZero)
-               v.AuxInt = int64ToAuxInt(t.Alignment())
-               v0 := b.NewValue0(v.Pos, OpRISCV64ADD, ptr.Type)
-               v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
-               v1.AuxInt = int64ToAuxInt(s - moveSize(t.Alignment(), config))
-               v0.AddArg2(ptr, v1)
-               v.AddArg3(ptr, v0, mem)
+               if !(s > 24*moveSize(t.Alignment(), config)) {
+                       break
+               }
+               v.reset(OpRISCV64LoweredZeroLoop)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(s), int32(t.Alignment())))
+               v.AddArg2(ptr, mem)
                return true
        }
+       return false
 }
 func rewriteBlockRISCV64(b *Block) bool {
        typ := &b.Func.Config.Types