runtime: improve memmove on ppc64x/power10

author Archana R <aravind5@in.ibm.com>

Wed, 22 Feb 2023 11:52:15 +0000 (05:52 -0600)

committer Lynn Boger <laboger@linux.vnet.ibm.com>

Wed, 22 Mar 2023 11:36:20 +0000 (11:36 +0000)
author Archana R <aravind5@in.ibm.com>
Wed, 22 Feb 2023 11:52:15 +0000 (05:52 -0600)
committer Lynn Boger <laboger@linux.vnet.ibm.com>
Wed, 22 Mar 2023 11:36:20 +0000 (11:36 +0000)
diff --git a/src/runtime/memmove_ppc64x.s b/src/runtime/memmove_ppc64x.s

index 5fa51c0a4cf9d673c9f6d8775bec3bf8ec821150..dee59054b9010087b88ed939f5cbe69ca0b1364a 100644 (file)
--- a/src/runtime/memmove_ppc64x.s
+++ b/src/runtime/memmove_ppc64x.s
@@ -39,6 +39,15 @@ TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
         // Determine if there are doublewords to
         // copy so a more efficient move can be done
  check:
+#ifdef GOPPC64_power10
+       CMP     LEN, $16
+       BGT     mcopy
+       SLD     $56, LEN, TMP
+       LXVL    SRC, TMP, V0
+       STXVL   V0, TGT, TMP
+       RET
+#endif
+mcopy:
         ANDCC   $7, LEN, BYTES  // R7: bytes to copy
         SRD     $3, LEN, DWORDS // R6: double words to copy
         MOVFL   CR0, CR3        // save CR from ANDCC
@@ -110,12 +119,26 @@ lt32gt8:
  lt16:  // Move 8 bytes if possible
         CMP     DWORDS, $1
         BLT     checkbytes
+#ifdef GOPPC64_power10
+       ADD     $8, BYTES
+       SLD     $56, BYTES, TMP
+       LXVL    SRC, TMP, V0
+       STXVL   V0, TGT, TMP
+       RET
+#endif
+
         MOVD    0(SRC), TMP
         ADD     $8, SRC
         MOVD    TMP, 0(TGT)
         ADD     $8, TGT
  checkbytes:
         BC      12, 14, LR              // BEQ lr
+#ifdef GOPPC64_power10
+       SLD     $56, BYTES, TMP
+       LXVL    SRC, TMP, V0
+       STXVL   V0, TGT, TMP
+       RET
+#endif
  lt8:   // Move word if possible
         CMP BYTES, $4
         BLT lt4
@@ -183,6 +206,7 @@ backward32setup:
         ANDCC   $3,DWORDS               // Compute remaining DWORDS and compare to 0
         MOVD    QWORDS, CTR             // set up loop ctr
         MOVD    $16, IDX16              // 32 bytes at a time
+       PCALIGN $32
  
  backward32loop:
         SUB     $32, TGT
author	Archana R <aravind5@in.ibm.com>
	Wed, 22 Feb 2023 11:52:15 +0000 (05:52 -0600)
committer	Lynn Boger <laboger@linux.vnet.ibm.com>
	Wed, 22 Mar 2023 11:36:20 +0000 (11:36 +0000)