From: Meng Zhuo Date: Fri, 12 Jun 2020 16:06:49 +0000 (+0800) Subject: cmd/asm: Add SHA3 hardware instructions for ARM64 X-Git-Tag: go1.16beta1~792 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=3036b76df0ae748856e3e0008b67241cc580e263;p=gostls13.git cmd/asm: Add SHA3 hardware instructions for ARM64 Armv8.2-SHA introduced four SHA3-related instructions EOR3 .16B, .16B, .16B, .16B RAX1 .2D, .2D, .2D XAR .2D, .2D, .2D, # BCAX .16B, .16B, .16B, .16B We convert them into Go asm style as: VEOR3 .B16, .B16, .B16, .B16 VRAX1 .D2, .D2, .D2 VXAR $imm6, .D2, .D2, .D2 VBCAX .B16, .B16, .B16, .B16 Armv8 Reference Manual: * EOR3 (Three-way Exclusive OR) on C7.2.42 * RAX1 (Rotate and Exclusive OR) on C7.2.217 * XAR (Exclusive OR and Rotate) on C7.2.401 * BCAX (Bit Clear and Exclusive OR) on C7.2.12 Change-Id: I9a5d1b5ad508ed8fd5289d535906c54d9a63ca5a Reviewed-on: https://go-review.googlesource.com/c/go/+/180757 Run-TryBot: Meng Zhuo TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Emmanuel Odeke --- diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index e277c04b7c..7f495b90bb 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -81,6 +81,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 SHA512H2 V4.D2, V3, V2 // 628464ce SHA512SU0 V9.D2, V8.D2 // 2881c0ce SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce + VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace + VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce VADDV V0.S4, V0 // 00b8b14e VMOVI $82, V0.B16 // 40e6024f VUADDLV V6.B16, V6 // c638306e @@ -139,6 +141,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e VTBL V3.B8, [V27.B16], V8.B8 // 6803030e + VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce + VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 1ca41c15ba..33319e48df 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -958,9 +958,11 @@ const ( AVADDP AVAND AVBIF + AVBCAX AVCMEQ AVCNT AVEOR + AVEOR3 AVMOV AVLD1 AVLD2 @@ -989,6 +991,7 @@ const ( AVPMULL2 AVEXT AVRBIT + AVRAX1 AVUSHR AVUSHLL AVUSHLL2 @@ -1001,6 +1004,7 @@ const ( AVBSL AVBIT AVTBL + AVXAR AVZIP1 AVZIP2 AVCMTST diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 900cdba817..e5534e26b9 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -464,9 +464,11 @@ var Anames = []string{ "VADDP", "VAND", "VBIF", + "VBCAX", "VCMEQ", "VCNT", "VEOR", + "VEOR3", "VMOV", "VLD1", "VLD2", @@ -495,6 +497,7 @@ var Anames = []string{ "VPMULL2", "VEXT", "VRBIT", + "VRAX1", "VUSHR", "VUSHLL", "VUSHLL2", @@ -507,6 +510,7 @@ var Anames = []string{ "VBSL", "VBIT", "VTBL", + "VXAR", "VZIP1", "VZIP2", "VCMTST", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 7c35fce106..c46066313e 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -843,6 +843,8 @@ var optab = []Optab{ {ASHA256H, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, {AVREV32, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, {AVPMULL, C_ARNG, C_ARNG, C_NONE, C_ARNG, 93, 4, 0, 0, 0}, + {AVEOR3, C_ARNG, C_ARNG, C_ARNG, C_ARNG, 103, 4, 0, 0, 0}, + {AVXAR, C_VCON, C_ARNG, C_ARNG, C_ARNG, 104, 4, 0, 0, 0}, {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, {obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0}, @@ -2769,6 +2771,7 @@ func buildop(ctxt *obj.Link) { case AVADD: oprangeset(AVSUB, t) + oprangeset(AVRAX1, t) case AAESD: oprangeset(AAESE, t) @@ -2827,6 +2830,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVLD4, t) oprangeset(AVLD4R, t) + case AVEOR3: + oprangeset(AVBCAX, t) + case ASHA1H, AVCNT, AVMOV, @@ -2839,7 +2845,8 @@ func buildop(ctxt *obj.Link) { AVDUP, AVMOVI, APRFM, - AVEXT: + AVEXT, + AVXAR: break case obj.ANOP, @@ -4205,7 +4212,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2/vrax1 Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4269,6 +4276,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } else { size = 0 } + case AVRAX1: + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + size = 0 + Q = 0 } o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) @@ -5186,6 +5199,51 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("shift amount out of range: %v\n", p) } o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ + ta := (p.From.Reg >> 5) & 15 + tm := (p.Reg >> 5) & 15 + td := (p.To.Reg >> 5) & 15 + tn := ((p.GetFrom3().Reg) >> 5) & 15 + + if ta != tm || ta != tn || ta != td || ta != ARNG_16B { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + o1 = c.oprrr(p, p.As) + ra := int(p.From.Reg) + rm := int(p.Reg) + rn := int(p.GetFrom3().Reg) + rd := int(p.To.Reg) + o1 |= uint32(rm&31)<<16 | uint32(ra&31)<<10 | uint32(rn&31)<<5 | uint32(rd)&31 + + case 104: /* vxar $imm4, Vm., Vn., Vd. */ + af := ((p.GetFrom3().Reg) >> 5) & 15 + at := (p.To.Reg >> 5) & 15 + a := (p.Reg >> 5) & 15 + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement, should be D2: %v", p) + break + } + + if index < 0 || index > 63 { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := (p.GetFrom3().Reg) & 31 + rt := (p.To.Reg) & 31 + r := (p.Reg) & 31 + + o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + } out[0] = o1 out[1] = o2 @@ -5761,6 +5819,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVAND: return 7<<25 | 1<<21 | 7<<10 + case AVBCAX: + return 0xCE<<24 | 1<<21 + case AVCMEQ: return 1<<29 | 0x71<<21 | 0x23<<10 @@ -5776,12 +5837,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVEOR: return 1<<29 | 0x71<<21 | 7<<10 + case AVEOR3: + return 0xCE << 24 + case AVORR: return 7<<25 | 5<<21 | 7<<10 case AVREV16: return 3<<26 | 2<<24 | 1<<21 | 3<<11 + case AVRAX1: + return 0xCE<<24 | 3<<21 | 1<<15 | 3<<10 + case AVREV32: return 11<<26 | 2<<24 | 1<<21 | 1<<11 @@ -6039,6 +6106,8 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVUSHLL2, AVUXTL2: return 3<<29 | 15<<24 | 0x29<<10 + case AVXAR: + return 0xCE<<24 | 1<<23 } c.ctxt.Diag("%v: bad irr %v", p, a)