From: Keith Randall Date: Tue, 3 Jun 2025 20:06:25 +0000 (-0700) Subject: cmd/compile: simplify zerorange on arm64 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=5e94d72158077411547287fc36f24b3121cca645;p=gostls13.git cmd/compile: simplify zerorange on arm64 Get rid of large zeroing cases. We only use this code for small things now. Change-Id: Iba0a98785c5b4b72cf031763edb69ff741ca41af Reviewed-on: https://go-review.googlesource.com/c/go/+/678936 Reviewed-by: Keith Randall Auto-Submit: Keith Randall Reviewed-by: Jorropo LUCI-TryBot-Result: Go LUCI Reviewed-by: Mark Freeman --- diff --git a/src/cmd/compile/internal/arm64/ggen.go b/src/cmd/compile/internal/arm64/ggen.go index a681adcb7f..1402746700 100644 --- a/src/cmd/compile/internal/arm64/ggen.go +++ b/src/cmd/compile/internal/arm64/ggen.go @@ -5,9 +5,7 @@ package arm64 import ( - "cmd/compile/internal/ir" "cmd/compile/internal/objw" - "cmd/compile/internal/types" "cmd/internal/obj" "cmd/internal/obj/arm64" ) @@ -22,47 +20,20 @@ func padframe(frame int64) int64 { } func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog { - if cnt == 0 { - return p + if cnt%8 != 0 { + panic("zeroed region not aligned") } - if cnt < int64(4*types.PtrSize) { - for i := int64(0); i < cnt; i += int64(types.PtrSize) { - p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off+i) - } - } else if cnt <= int64(128*types.PtrSize) { - if cnt%(2*int64(types.PtrSize)) != 0 { - p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off) - off += int64(types.PtrSize) - cnt -= int64(types.PtrSize) - } - p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REG_R20, 0) - p = pp.Append(p, arm64.AADD, obj.TYPE_CONST, 0, 8+off, obj.TYPE_REG, arm64.REG_R20, 0) - p.Reg = arm64.REG_R20 - p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) - p.To.Name = obj.NAME_EXTERN - p.To.Sym = ir.Syms.Duffzero - p.To.Offset = 4 * (64 - cnt/(2*int64(types.PtrSize))) - } else { - // Not using REGTMP, so this is async preemptible (async preemption clobbers REGTMP). - // We are at the function entry, where no register is live, so it is okay to clobber - // other registers - const rtmp = arm64.REG_R20 - p = pp.Append(p, arm64.AMOVD, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, rtmp, 0) - p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) - p = pp.Append(p, arm64.AADD, obj.TYPE_REG, rtmp, 0, obj.TYPE_REG, arm64.REGRT1, 0) - p.Reg = arm64.REGRT1 - p = pp.Append(p, arm64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, rtmp, 0) - p = pp.Append(p, arm64.AADD, obj.TYPE_REG, rtmp, 0, obj.TYPE_REG, arm64.REGRT2, 0) - p.Reg = arm64.REGRT1 - p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGRT1, int64(types.PtrSize)) - p.Scond = arm64.C_XPRE - p1 := p - p = pp.Append(p, arm64.ACMP, obj.TYPE_REG, arm64.REGRT1, 0, obj.TYPE_NONE, 0, 0) - p.Reg = arm64.REGRT2 - p = pp.Append(p, arm64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) - p.To.SetTarget(p1) + off += 8 // return address was ignored in offset calculation + for cnt >= 16 && off < 512 { + p = pp.Append(p, arm64.ASTP, obj.TYPE_REGREG, arm64.REGZERO, arm64.REGZERO, obj.TYPE_MEM, arm64.REGSP, off) + off += 16 + cnt -= 16 + } + for cnt != 0 { + p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, off) + off += 8 + cnt -= 8 } - return p }