From: fanzha02 Date: Fri, 20 Apr 2018 06:50:29 +0000 (+0000) Subject: cmd/internal/obj/arm64: reorder the assembler's optab entries X-Git-Tag: go1.11beta1~637 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=d7f5c0360f1c305a7de6b3f7c648fe657323425f;p=gostls13.git cmd/internal/obj/arm64: reorder the assembler's optab entries Current optab entries are unordered, because the new instructions are added at the end of the optab. The patch reorders them by comments in optab, such as arithmetic operations, logical operations and a series of load/store etc. The patch removes the VMOVS opcode because FMOVS already has the same operation. Change-Id: Iccdf89ecbb3875b9dfcb6e06be2cc19c7e5581a2 Reviewed-on: https://go-review.googlesource.com/109896 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index c61d22051d..9a2e20acc0 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -226,12 +226,15 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VST1.P V4.D[1], 8(R0) // 04849f4d VST1.P V4.D[1], (R0)(R1) // VST1.P V4.D[1], (R0)(R1*1) // 0484814d VST1 V4.D[1], (R0) // 0484004d - VMOVS V20, (R0) // 140000bd - VMOVS.P V20, 4(R0) // 144400bc - VMOVS.W V20, 4(R0) // 144c00bc - VMOVS (R0), V20 // 140040bd - VMOVS.P 8(R0), V20 // 148440bc - VMOVS.W 8(R0), V20 // 148c40bc + FMOVS F20, (R0) // 140000bd + FMOVS.P F20, 4(R0) // 144400bc + FMOVS.W F20, 4(R0) // 144c00bc + FMOVS (R0), F20 // 140040bd + FMOVS.P 8(R0), F20 // 148440bc + FMOVS.W 8(R0), F20 // 148c40bc + FMOVD F20, (R2) // 540000fd + FMOVD.P F20, 8(R1) // 348400fc + FMOVD.W 8(R1), F20 // 348c40fc PRFM (R2), PLDL1KEEP // 400080f9 PRFM 16(R2), PLDL1KEEP // 400880f9 PRFM 48(R6), PSTL2STRM // d31880f9 diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index af57dfefed..1e92a7d5af 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -884,7 +884,6 @@ const ( AVREV64 AVST1 AVDUP - AVMOVS AVADDV AVMOVI AVUADDLV diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index aee409e94b..e3ba4a00b7 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -386,7 +386,6 @@ var Anames = []string{ "VREV64", "VST1", "VDUP", - "VMOVS", "VADDV", "VMOVI", "VUADDLV", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 1db61d1a45..078b319a3d 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -207,6 +207,26 @@ var optab = []Optab{ {ACMP, C_EXTREG, C_RSP, C_NONE, 27, 4, 0, 0, 0}, {AADD, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0}, {AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, + {AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0}, + {AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0}, + {AMADD, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0}, + {AREM, C_REG, C_REG, C_REG, 16, 8, 0, 0, 0}, + {AREM, C_REG, C_NONE, C_REG, 16, 8, 0, 0, 0}, + {ASDIV, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, + {ASDIV, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0}, + + {AFADDS, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFADDS, C_FREG, C_FREG, C_FREG, 54, 4, 0, 0, 0}, + {AFADDS, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFADDS, C_FCON, C_FREG, C_FREG, 54, 4, 0, 0, 0}, + {AFMSUBD, C_FREG, C_FREG, C_FREG, 15, 4, 0, 0, 0}, + {AFCMPS, C_FREG, C_FREG, C_NONE, 56, 4, 0, 0, 0}, + {AFCMPS, C_FCON, C_FREG, C_NONE, 56, 4, 0, 0, 0}, + {AVADDP, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, + {AVADD, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, + {AVADD, C_VREG, C_VREG, C_VREG, 89, 4, 0, 0, 0}, + {AVADD, C_VREG, C_NONE, C_VREG, 89, 4, 0, 0, 0}, + {AVADDV, C_ARNG, C_NONE, C_VREG, 85, 4, 0, 0, 0}, /* logical operations */ {AAND, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0}, @@ -259,8 +279,7 @@ var optab = []Optab{ {AMOVK, C_VCON, C_NONE, C_REG, 33, 4, 0, 0, 0}, {AMOVD, C_AACON, C_NONE, C_REG, 4, 4, REGFROM, 0, 0}, - {ASDIV, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, - {ASDIV, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0}, + {AB, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, {ABL, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, {AB, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, @@ -271,6 +290,12 @@ var optab = []Optab{ {obj.ARET, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, {AADRP, C_SBRA, C_NONE, C_REG, 60, 4, 0, 0, 0}, {AADR, C_SBRA, C_NONE, C_REG, 61, 4, 0, 0, 0}, + {ACBZ, C_REG, C_NONE, C_SBRA, 39, 4, 0, 0, 0}, + {ATBZ, C_VCON, C_REG, C_SBRA, 40, 4, 0, 0, 0}, + + {AERET, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0}, + {ACLREX, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0}, + {ACLREX, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0}, {ABFM, C_VCON, C_REG, C_REG, 42, 4, 0, 0, 0}, {ABFI, C_VCON, C_REG, C_REG, 43, 4, 0, 0, 0}, {AEXTR, C_VCON, C_REG, C_REG, 44, 4, 0, 0, 0}, @@ -307,86 +332,113 @@ var optab = []Optab{ {AMOVD, C_GOTADDR, C_NONE, C_REG, 71, 8, 0, 0, 0}, {AMOVD, C_TLS_LE, C_NONE, C_REG, 69, 4, 0, 0, 0}, {AMOVD, C_TLS_IE, C_NONE, C_REG, 70, 8, 0, 0, 0}, - {AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0}, - {AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0}, - {AMADD, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0}, - {AREM, C_REG, C_REG, C_REG, 16, 8, 0, 0, 0}, - {AREM, C_REG, C_NONE, C_REG, 16, 8, 0, 0, 0}, + + {AFMOVS, C_FREG, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AFMOVS, C_ADDR, C_NONE, C_FREG, 65, 12, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, + {AFMOVD, C_ADDR, C_NONE, C_FREG, 65, 12, 0, 0, 0}, + {AFMOVS, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMOVD, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, + {AFMOVS, C_REG, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_REG, 29, 4, 0, 0, 0}, + {AFMOVD, C_REG, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_REG, 29, 4, 0, 0, 0}, + {AFCVTZSD, C_FREG, C_NONE, C_REG, 29, 4, 0, 0, 0}, + {ASCVTFD, C_REG, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AFCVTSD, C_FREG, C_NONE, C_FREG, 29, 4, 0, 0, 0}, + {AVCNT, C_ARNG, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_REG, 73, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_ELEM, 92, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_VREG, 80, 4, 0, 0, 0}, + {AVMOV, C_REG, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, + {AVMOV, C_REG, C_NONE, C_ELEM, 78, 4, 0, 0, 0}, + {AVMOV, C_ARNG, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, + {AVDUP, C_ELEM, C_NONE, C_ARNG, 79, 4, 0, 0, 0}, + {AVMOVI, C_ADDCON, C_NONE, C_ARNG, 86, 4, 0, 0, 0}, + {AVFMLA, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, + {AVEXT, C_VCON, C_ARNG, C_ARNG, 94, 4, 0, 0, 0}, + {AVUSHR, C_VCON, C_ARNG, C_ARNG, 95, 4, 0, 0, 0}, + + /* conditional operations */ {ACSEL, C_COND, C_REG, C_REG, 18, 4, 0, 0, 0}, /* from3 optional */ {ACSET, C_COND, C_NONE, C_REG, 18, 4, 0, 0, 0}, + {AFCSELD, C_COND, C_FREG, C_FREG, 18, 4, 0, 0, 0}, {ACCMN, C_COND, C_REG, C_VCON, 19, 4, 0, 0, 0}, /* from3 either C_REG or C_VCON */ - {APRFM, C_UOREG32K, C_NONE, C_SPR, 91, 4, 0, 0, 0}, - {APRFM, C_UOREG32K, C_NONE, C_LCON, 91, 4, 0, 0, 0}, + {AFCCMPS, C_COND, C_FREG, C_VCON, 57, 4, 0, 0, 0}, + + /* SWPD/SWPW/SWPH/SWPB */ + {ASWPD, C_ZAUTO, C_REG, C_REG, 47, 4, REGSP, 0, 0}, + {ASWPD, C_ZOREG, C_REG, C_REG, 47, 4, 0, 0, 0}, /* scaled 12-bit unsigned displacement store */ {AMOVB, C_REG, C_NONE, C_UAUTO4K, 20, 4, REGSP, 0, 0}, {AMOVB, C_REG, C_NONE, C_UOREG4K, 20, 4, 0, 0, 0}, {AMOVBU, C_REG, C_NONE, C_UAUTO4K, 20, 4, REGSP, 0, 0}, {AMOVBU, C_REG, C_NONE, C_UOREG4K, 20, 4, 0, 0, 0}, - {AMOVH, C_REG, C_NONE, C_UAUTO8K, 20, 4, REGSP, 0, 0}, - {AMOVH, C_REG, C_NONE, C_ZOREG, 20, 4, 0, 0, 0}, {AMOVH, C_REG, C_NONE, C_UOREG8K, 20, 4, 0, 0, 0}, - {AMOVW, C_REG, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, - {AMOVW, C_REG, C_NONE, C_ZOREG, 20, 4, 0, 0, 0}, {AMOVW, C_REG, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + {AMOVD, C_REG, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, + {AMOVD, C_REG, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, - {AVMOVS, C_VREG, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, - {AVMOVS, C_VREG, C_NONE, C_ZOREG, 20, 4, 0, 0, 0}, - {AVMOVS, C_VREG, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, /* unscaled 9-bit signed displacement store */ {AMOVB, C_REG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, {AMOVB, C_REG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, {AMOVBU, C_REG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, {AMOVBU, C_REG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, - {AMOVH, C_REG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, {AMOVH, C_REG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, {AMOVW, C_REG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, {AMOVW, C_REG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, - - {AMOVD, C_REG, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, - {AMOVD, C_REG, C_NONE, C_ZOREG, 20, 4, 0, 0, 0}, - {AMOVD, C_REG, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, {AMOVD, C_REG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, {AMOVD, C_REG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, - /* short displacement load */ + {AFMOVS, C_FREG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVS, C_FREG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVD, C_FREG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + + /* scaled 12-bit unsigned displacement load */ {AMOVB, C_UAUTO4K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVB, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVB, C_ZOREG, C_NONE, C_REG, 21, 4, 0, 0, 0}, {AMOVB, C_UOREG4K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVB, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVBU, C_UAUTO4K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVBU, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVBU, C_ZOREG, C_NONE, C_REG, 21, 4, 0, 0, 0}, {AMOVBU, C_UOREG4K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVBU, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVH, C_UAUTO8K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVH, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVH, C_ZOREG, C_NONE, C_REG, 21, 4, 0, 0, 0}, {AMOVH, C_UOREG8K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVH, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVW, C_UAUTO16K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVW, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVW, C_ZOREG, C_NONE, C_REG, 21, 4, 0, 0, 0}, {AMOVW, C_UOREG16K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVW, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVD, C_UAUTO32K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVD, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AMOVD, C_ZOREG, C_NONE, C_REG, 21, 4, 0, 0, 0}, {AMOVD, C_UOREG32K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + + {AFMOVS, C_UAUTO16K, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVS, C_UOREG16K, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVD, C_UAUTO32K, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVD, C_UOREG32K, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + + /* unscaled 9-bit signed displacement load */ + {AMOVB, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AMOVB, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AMOVBU, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AMOVBU, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AMOVH, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AMOVH, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AMOVW, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AMOVW, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AMOVD, C_NSAUTO, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, {AMOVD, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, - {AVMOVS, C_UAUTO16K, C_NONE, C_VREG, 21, 4, REGSP, 0, 0}, - {AVMOVS, C_ZOREG, C_NONE, C_VREG, 21, 4, 0, 0, 0}, - {AVMOVS, C_UOREG16K, C_NONE, C_VREG, 21, 4, 0, 0, 0}, + {AFMOVS, C_NSAUTO, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVS, C_NSOREG, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVD, C_NSAUTO, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVD, C_NSOREG, C_NONE, C_FREG, 21, 4, 0, 0, 0}, /* long displacement store */ {AMOVB, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, @@ -400,6 +452,11 @@ var optab = []Optab{ {AMOVD, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, {AMOVD, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVS, C_FREG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVS, C_FREG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVD, C_FREG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVD, C_FREG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + /* long displacement load */ {AMOVB, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0}, {AMOVB, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0}, @@ -412,6 +469,11 @@ var optab = []Optab{ {AMOVD, C_LAUTO, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0}, {AMOVD, C_LOREG, C_NONE, C_REG, 31, 8, 0, LFROM, 0}, + {AFMOVS, C_LAUTO, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVS, C_LOREG, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + {AFMOVD, C_LAUTO, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVD, C_LOREG, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + /* load long effective stack address (load int32 offset and add) */ {AMOVD, C_LACON, C_NONE, C_REG, 34, 8, REGSP, LFROM, 0}, @@ -423,7 +485,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, {AFMOVS, C_LOREG, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, {AFMOVD, C_LOREG, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, - {AVMOVS, C_LOREG, C_NONE, C_VREG, 22, 4, 0, 0, C_XPOST}, + {AMOVD, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AMOVW, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AMOVH, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, @@ -431,7 +493,6 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AFMOVS, C_LOREG, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, {AFMOVD, C_LOREG, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, - {AVMOVS, C_LOREG, C_NONE, C_VREG, 22, 4, 0, 0, C_XPRE}, /* pre/post-indexed store (unscaled, signed 9-bit offset) */ {AMOVD, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, @@ -441,7 +502,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVS, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVD, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, - {AVMOVS, C_VREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AMOVD, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AMOVW, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AMOVH, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, @@ -449,7 +510,6 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVS, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVD, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, - {AVMOVS, C_VREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, /* load with shifted or extended register offset */ {AMOVD, C_ROFF, C_NONE, C_REG, 98, 4, 0, 0, 0}, @@ -464,10 +524,6 @@ var optab = []Optab{ {AMOVH, C_REG, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, {AMOVB, C_REG, C_NONE, C_ROFF, 99, 4, 0, 0, 0}, - /* SWPD/SWPW/SWPH/SWPB */ - {ASWPD, C_ZAUTO, C_REG, C_REG, 47, 4, REGSP, 0, 0}, - {ASWPD, C_ZOREG, C_REG, C_REG, 47, 4, 0, 0, 0}, - /* pre/post-indexed/signed-offset load/store register pair (unscaled, signed 10-bit quad-aligned and long offset) */ {ALDP, C_NPAUTO, C_NONE, C_PAIR, 66, 4, REGSP, 0, 0}, @@ -599,75 +655,6 @@ var optab = []Optab{ {ASTPW, C_PAIR, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPOST}, {ASTPW, C_PAIR, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, - /* special */ - {AMOVD, C_SPR, C_NONE, C_REG, 35, 4, 0, 0, 0}, - {AMRS, C_SPR, C_NONE, C_REG, 35, 4, 0, 0, 0}, - {AMOVD, C_REG, C_NONE, C_SPR, 36, 4, 0, 0, 0}, - {AMSR, C_REG, C_NONE, C_SPR, 36, 4, 0, 0, 0}, - {AMOVD, C_VCON, C_NONE, C_SPR, 37, 4, 0, 0, 0}, - {AMSR, C_VCON, C_NONE, C_SPR, 37, 4, 0, 0, 0}, - {AERET, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0}, - {AFMOVS, C_FREG, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, - {AFMOVS, C_FREG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, - {AFMOVS, C_FREG, C_NONE, C_ZOREG, 20, 4, 0, 0, 0}, - {AFMOVS, C_FREG, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, - {AFMOVS, C_FREG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, - {AFMOVD, C_FREG, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, - {AFMOVD, C_FREG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, - {AFMOVD, C_FREG, C_NONE, C_ZOREG, 20, 4, 0, 0, 0}, - {AFMOVD, C_FREG, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, - {AFMOVD, C_FREG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, - {AFMOVS, C_UAUTO16K, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, - {AFMOVS, C_NSAUTO, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, - {AFMOVS, C_ZOREG, C_NONE, C_FREG, 21, 4, 0, 0, 0}, - {AFMOVS, C_UOREG16K, C_NONE, C_FREG, 21, 4, 0, 0, 0}, - {AFMOVS, C_NSOREG, C_NONE, C_FREG, 21, 4, 0, 0, 0}, - {AFMOVD, C_UAUTO32K, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, - {AFMOVD, C_NSAUTO, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, - {AFMOVD, C_ZOREG, C_NONE, C_FREG, 21, 4, 0, 0, 0}, - {AFMOVD, C_UOREG32K, C_NONE, C_FREG, 21, 4, 0, 0, 0}, - {AFMOVD, C_NSOREG, C_NONE, C_FREG, 21, 4, 0, 0, 0}, - {AFMOVS, C_FREG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, - {AFMOVS, C_FREG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, - {AFMOVD, C_FREG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, - {AFMOVD, C_FREG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, - {AFMOVS, C_LAUTO, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, - {AFMOVS, C_LOREG, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, - {AFMOVD, C_LAUTO, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, - {AFMOVD, C_LOREG, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, - {AFMOVS, C_FREG, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, - {AFMOVS, C_ADDR, C_NONE, C_FREG, 65, 12, 0, 0, 0}, - {AFMOVD, C_FREG, C_NONE, C_ADDR, 64, 12, 0, 0, 0}, - {AFMOVD, C_ADDR, C_NONE, C_FREG, 65, 12, 0, 0, 0}, - {AFADDS, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, - {AFADDS, C_FREG, C_FREG, C_FREG, 54, 4, 0, 0, 0}, - {AFADDS, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, - {AFADDS, C_FCON, C_FREG, C_FREG, 54, 4, 0, 0, 0}, - {AFMSUBD, C_FREG, C_FREG, C_FREG, 15, 4, 0, 0, 0}, - {AFMOVS, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, - {AFMOVS, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, - {AFMOVD, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0}, - {AFMOVD, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0}, - {AFCVTZSD, C_FREG, C_NONE, C_REG, 29, 4, 0, 0, 0}, - {ASCVTFD, C_REG, C_NONE, C_FREG, 29, 4, 0, 0, 0}, - {AFMOVS, C_REG, C_NONE, C_FREG, 29, 4, 0, 0, 0}, - {AFMOVS, C_FREG, C_NONE, C_REG, 29, 4, 0, 0, 0}, - {AFMOVD, C_REG, C_NONE, C_FREG, 29, 4, 0, 0, 0}, - {AFMOVD, C_FREG, C_NONE, C_REG, 29, 4, 0, 0, 0}, - {AFCMPS, C_FREG, C_FREG, C_NONE, 56, 4, 0, 0, 0}, - {AFCMPS, C_FCON, C_FREG, C_NONE, 56, 4, 0, 0, 0}, - {AFCCMPS, C_COND, C_FREG, C_VCON, 57, 4, 0, 0, 0}, - {AFCSELD, C_COND, C_FREG, C_FREG, 18, 4, 0, 0, 0}, - {AFCVTSD, C_FREG, C_NONE, C_FREG, 29, 4, 0, 0, 0}, - {ACLREX, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0}, - {ACLREX, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0}, - {ACBZ, C_REG, C_NONE, C_SBRA, 39, 4, 0, 0, 0}, - {ATBZ, C_VCON, C_REG, C_SBRA, 40, 4, 0, 0, 0}, - {ASYS, C_VCON, C_NONE, C_NONE, 50, 4, 0, 0, 0}, - {ASYS, C_VCON, C_REG, C_NONE, 50, 4, 0, 0, 0}, - {ASYSL, C_VCON, C_NONE, C_REG, 50, 4, 0, 0, 0}, - {ADMB, C_VCON, C_NONE, C_NONE, 51, 4, 0, 0, 0}, - {AHINT, C_VCON, C_NONE, C_NONE, 52, 4, 0, 0, 0}, {ALDAR, C_ZOREG, C_NONE, C_REG, 58, 4, 0, 0, 0}, {ALDXR, C_ZOREG, C_NONE, C_REG, 58, 4, 0, 0, 0}, {ALDAXR, C_ZOREG, C_NONE, C_REG, 58, 4, 0, 0, 0}, @@ -676,44 +663,46 @@ var optab = []Optab{ {ASTXR, C_REG, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, // to3=C_REG {ASTLXR, C_REG, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, // to3=C_REG {ASTXP, C_PAIR, C_NONE, C_ZOREG, 59, 4, 0, 0, 0}, - {AAESD, C_VREG, C_NONE, C_VREG, 29, 4, 0, 0, 0}, // for compatibility with old code - {AAESD, C_ARNG, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, // recommend using the new one for better readability - {ASHA1C, C_VREG, C_REG, C_VREG, 1, 4, 0, 0, 0}, - {ASHA1C, C_ARNG, C_VREG, C_VREG, 1, 4, 0, 0, 0}, - {ASHA1H, C_VREG, C_NONE, C_VREG, 29, 4, 0, 0, 0}, - {ASHA1SU0, C_ARNG, C_ARNG, C_ARNG, 1, 4, 0, 0, 0}, - {ASHA256H, C_ARNG, C_VREG, C_VREG, 1, 4, 0, 0, 0}, - {AVADDP, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, - {AVADD, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, - {AVADD, C_VREG, C_VREG, C_VREG, 89, 4, 0, 0, 0}, - {AVADD, C_VREG, C_NONE, C_VREG, 89, 4, 0, 0, 0}, + + /* VLD1/VST1 */ {AVLD1, C_ZOREG, C_NONE, C_LIST, 81, 4, 0, 0, 0}, {AVLD1, C_LOREG, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1, C_ROFF, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1, C_LOREG, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, {AVLD1, C_ROFF, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, {AVLD1, C_LOREG, C_NONE, C_ELEM, 97, 4, 0, 0, 0}, - {AVMOV, C_ELEM, C_NONE, C_REG, 73, 4, 0, 0, 0}, - {AVMOV, C_REG, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, - {AVMOV, C_ELEM, C_NONE, C_ELEM, 92, 4, 0, 0, 0}, - {AVMOV, C_ARNG, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, - {AVMOV, C_REG, C_NONE, C_ELEM, 78, 4, 0, 0, 0}, - {AVMOV, C_ELEM, C_NONE, C_VREG, 80, 4, 0, 0, 0}, - {AVREV32, C_ARNG, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, {AVST1, C_LIST, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, {AVST1, C_LIST, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, {AVST1, C_LIST, C_NONE, C_ROFF, 84, 4, 0, 0, C_XPOST}, {AVST1, C_ELEM, C_NONE, C_LOREG, 96, 4, 0, 0, C_XPOST}, {AVST1, C_ELEM, C_NONE, C_ROFF, 96, 4, 0, 0, C_XPOST}, {AVST1, C_ELEM, C_NONE, C_LOREG, 96, 4, 0, 0, 0}, - {AVDUP, C_ELEM, C_NONE, C_ARNG, 79, 4, 0, 0, 0}, - {AVADDV, C_ARNG, C_NONE, C_VREG, 85, 4, 0, 0, 0}, - {AVCNT, C_ARNG, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, - {AVMOVI, C_ADDCON, C_NONE, C_ARNG, 86, 4, 0, 0, 0}, - {AVFMLA, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, + + /* special */ + {AMOVD, C_SPR, C_NONE, C_REG, 35, 4, 0, 0, 0}, + {AMRS, C_SPR, C_NONE, C_REG, 35, 4, 0, 0, 0}, + {AMOVD, C_REG, C_NONE, C_SPR, 36, 4, 0, 0, 0}, + {AMSR, C_REG, C_NONE, C_SPR, 36, 4, 0, 0, 0}, + {AMOVD, C_VCON, C_NONE, C_SPR, 37, 4, 0, 0, 0}, + {AMSR, C_VCON, C_NONE, C_SPR, 37, 4, 0, 0, 0}, + {APRFM, C_UOREG32K, C_NONE, C_SPR, 91, 4, 0, 0, 0}, + {APRFM, C_UOREG32K, C_NONE, C_LCON, 91, 4, 0, 0, 0}, + {ADMB, C_VCON, C_NONE, C_NONE, 51, 4, 0, 0, 0}, + {AHINT, C_VCON, C_NONE, C_NONE, 52, 4, 0, 0, 0}, + {ASYS, C_VCON, C_NONE, C_NONE, 50, 4, 0, 0, 0}, + {ASYS, C_VCON, C_REG, C_NONE, 50, 4, 0, 0, 0}, + {ASYSL, C_VCON, C_NONE, C_REG, 50, 4, 0, 0, 0}, + + /* encryption instructions */ + {AAESD, C_VREG, C_NONE, C_VREG, 29, 4, 0, 0, 0}, // for compatibility with old code + {AAESD, C_ARNG, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, // recommend using the new one for better readability + {ASHA1C, C_VREG, C_REG, C_VREG, 1, 4, 0, 0, 0}, + {ASHA1C, C_ARNG, C_VREG, C_VREG, 1, 4, 0, 0, 0}, + {ASHA1H, C_VREG, C_NONE, C_VREG, 29, 4, 0, 0, 0}, + {ASHA1SU0, C_ARNG, C_ARNG, C_ARNG, 1, 4, 0, 0, 0}, + {ASHA256H, C_ARNG, C_VREG, C_VREG, 1, 4, 0, 0, 0}, + {AVREV32, C_ARNG, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, {AVPMULL, C_ARNG, C_ARNG, C_ARNG, 93, 4, 0, 0, 0}, - {AVEXT, C_VCON, C_ARNG, C_ARNG, 94, 4, 0, 0, 0}, - {AVUSHR, C_VCON, C_ARNG, C_ARNG, 95, 4, 0, 0, 0}, {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, {obj.APCDATA, C_VCON, C_NONE, C_VCON, 0, 0, 0, 0, 0}, @@ -2398,7 +2387,6 @@ func buildop(ctxt *obj.Link) { AVLD1, AVST1, AVDUP, - AVMOVS, AVMOVI, APRFM, AVEXT: @@ -5744,9 +5732,6 @@ func (c *ctxt7) opldr12(p *obj.Prog, a obj.As) uint32 { case AFMOVD: return LDSTR12U(3, 1, 1) - - case AVMOVS: - return LDSTR12U(2, 1, 1) } c.ctxt.Diag("bad opldr12 %v\n%v", a, p) @@ -5831,9 +5816,12 @@ func (c *ctxt7) opldrpp(p *obj.Prog, a obj.As) uint32 { case AMOVBU: return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 - case AVMOVS: + case AFMOVS: return 2<<30 | 7<<27 | 1<<26 | 0<<24 | 1<<22 + case AFMOVD: + return 3<<30 | 7<<27 | 1<<26 | 0<<24 | 1<<22 + case APRFM: return 0xf9<<24 | 2<<22 @@ -6112,7 +6100,7 @@ func movesize(a obj.As) int { case AMOVD: return 3 - case AMOVW, AMOVWU, AVMOVS: + case AMOVW, AMOVWU: return 2 case AMOVH, AMOVHU: diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go index d98b1b6f9e..845fb22817 100644 --- a/src/cmd/internal/obj/arm64/doc.go +++ b/src/cmd/internal/obj/arm64/doc.go @@ -97,6 +97,8 @@ such as str, stur, strb, sturb, strh, sturh stlr, stlrb. stlrh, st1. MADD R2, R30, R22, R6 <=> madd x6, x22, x2, x30 SMSUBL R10, R3, R17, R27 <=> smsubl x27, w17, w10, x3 +(3) FMADDD, FMADDS, FMSUBD, FMSUBS, FNMADDD, FNMADDS, FNMSUBD, FNMSUBS , , , + Examples: FMADDD F30, F20, F3, F29 <=> fmadd d29, d3, d30, d20 FNMSUBS F7, F25, F7, F22 <=> fnmsub s22, s7, s7, s25 diff --git a/src/crypto/sha1/sha1block_arm64.s b/src/crypto/sha1/sha1block_arm64.s index ef94353b44..d56838464d 100644 --- a/src/crypto/sha1/sha1block_arm64.s +++ b/src/crypto/sha1/sha1block_arm64.s @@ -26,7 +26,7 @@ TEXT ·sha1block(SB),NOSPLIT,$0 MOVD k_base+48(FP), R2 // k constants first address MOVD p_len+32(FP), R3 // message length VLD1.P 16(R0), [V0.S4] - VMOVS (R0), V20 + FMOVS (R0), F20 SUB $16, R0, R0 blockloop: @@ -148,5 +148,5 @@ blockloop: sha1ret: VST1.P [V0.S4], 16(R0) // store hash value H(dcba) - VMOVS V20, (R0) // store hash value H(e) + FMOVS F20, (R0) // store hash value H(e) RET