runtime: memclrNoHeapPointers optimization for block alignment

author Vasily Leonenko <vasiliy.leonenko@gmail.com>

Wed, 25 Sep 2024 20:01:21 +0000 (23:01 +0300)

committer Gopher Robot <gobot@golang.org>

Thu, 3 Oct 2024 21:06:16 +0000 (21:06 +0000)
author Vasily Leonenko <vasiliy.leonenko@gmail.com>
Wed, 25 Sep 2024 20:01:21 +0000 (23:01 +0300)
committer Gopher Robot <gobot@golang.org>
Thu, 3 Oct 2024 21:06:16 +0000 (21:06 +0000)
diff --git a/src/runtime/memclr_arm64.s b/src/runtime/memclr_arm64.s

index 1c35dfe0cf258bb3737fed3767d2460d7d686aaf..3e49f7fcf6a26fb2e211aebd141bd3977a9215cb 100644 (file)
--- a/src/runtime/memclr_arm64.s
+++ b/src/runtime/memclr_arm64.s
@@ -82,6 +82,7 @@ last16:
  last_end:
         RET
  
+       PCALIGN $16
  no_zva:
         SUB     $16, R0, R0
         SUB     $64, R1, R1
@@ -98,6 +99,7 @@ loop_64:
         BNE     tail63
         RET
  
+       PCALIGN $16
  try_zva:
         // Try using the ZVA feature to zero entire cache lines
         // It is not meaningful to use ZVA if the block size is less than 64,
@@ -124,6 +126,7 @@ try_zva:
         MOVW    R5, block_size<>(SB)
         B       no_zva
  
+       PCALIGN $16
  init:
         MOVW    $4, R9
         ANDW    $15, R3, R5
@@ -134,6 +137,7 @@ init:
         // Block size is less than 64.
         BNE     no_zva
  
+       PCALIGN $16
  zero_by_line:
         CMP     R5, R1
         // Not enough memory to reach alignment
@@ -170,6 +174,7 @@ loop_zva_prolog:
  aligned:
         SUB     R5, R1, R1
  
+       PCALIGN $16
  loop_zva:
         WORD    $0xd50b7420 // DC ZVA, R0
         ADD     R5, R0, R0
author	Vasily Leonenko <vasiliy.leonenko@gmail.com>
	Wed, 25 Sep 2024 20:01:21 +0000 (23:01 +0300)
committer	Gopher Robot <gobot@golang.org>
	Thu, 3 Oct 2024 21:06:16 +0000 (21:06 +0000)