From c79c5e1aa427eceb585f839a81d02c5390457a9c Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Tue, 14 Apr 2020 15:32:29 -0400 Subject: [PATCH] cmd/internal/obj/ppc64: add support for PCALIGN 32 This adds support support for the PCALIGN value 32. When this directive occurs code will be aligned to 32 bytes unless too many NOPs are needed, and then will fall back to 16 byte alignment. On Linux the function's alignment is promoted from 16 to 32 in functions where PCALIGN 32 appears. On AIX the function's alignment is left at 16 due to complexity with modifying its alignment, which means code will be aligned to at least 16, possibly 32 at times, which is still good. Test was updated to accept new value. Change-Id: I28e72d5f30ca472ed9ba736ddeabfea192d11797 Reviewed-on: https://go-review.googlesource.com/c/go/+/228258 Run-TryBot: Lynn Boger TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/internal/obj/ppc64/asm9.go | 31 ++++++++++++++++--- src/cmd/internal/obj/ppc64/asm_test.go | 42 +++++++++++++++++++++----- 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 288e5f3360..92ce4249c9 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -620,19 +620,42 @@ var oprange [ALAST & obj.AMask][]Optab var xcmp [C_NCLASS][C_NCLASS]bool // padding bytes to add to align code as requested -func addpad(pc, a int64, ctxt *obj.Link) int { +func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { + // For 16 and 32 byte alignment, there is a tradeoff + // between aligning the code and adding too many NOPs. switch a { case 8: if pc&7 != 0 { return 4 } case 16: + // Align to 16 bytes if possible but add at + // most 2 NOPs. switch pc & 15 { case 4, 12: return 4 case 8: return 8 } + case 32: + // Align to 32 bytes if possible but add at + // most 3 NOPs. + switch pc & 31 { + case 4, 20: + return 12 + case 8, 24: + return 8 + case 12, 28: + return 4 + } + // When 32 byte alignment is requested on Linux, + // promote the function's alignment to 32. On AIX + // the function alignment is not changed which might + // result in 16 byte alignment but that is still fine. + // TODO: alignment on AIX + if ctxt.Headtype != objabi.Haix && cursym.Func.Align < 32 { + cursym.Func.Align = 32 + } default: ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a) } @@ -663,7 +686,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if m == 0 { if p.As == obj.APCALIGN { a := c.vregoff(&p.From) - m = addpad(pc, a, ctxt) + m = addpad(pc, a, ctxt, cursym) } else { if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { ctxt.Diag("zero-width instruction\n%v", p) @@ -721,7 +744,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if m == 0 { if p.As == obj.APCALIGN { a := c.vregoff(&p.From) - m = addpad(pc, a, ctxt) + m = addpad(pc, a, ctxt, cursym) } else { if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { ctxt.Diag("zero-width instruction\n%v", p) @@ -761,7 +784,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if o.type_ == 0 && p.As == obj.APCALIGN { pad := LOP_RRR(OP_OR, REGZERO, REGZERO, REGZERO) aln := c.vregoff(&p.From) - v := addpad(p.Pc, aln, c.ctxt) + v := addpad(p.Pc, aln, c.ctxt, c.cursym) if v > 0 { // Same padding instruction for all for i = 0; i < int32(v/4); i++ { diff --git a/src/cmd/internal/obj/ppc64/asm_test.go b/src/cmd/internal/obj/ppc64/asm_test.go index fff478e552..70dabc2017 100644 --- a/src/cmd/internal/obj/ppc64/asm_test.go +++ b/src/cmd/internal/obj/ppc64/asm_test.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "strings" "testing" ) @@ -17,21 +18,20 @@ import ( var invalidPCAlignSrc = ` TEXT test(SB),0,$0-0 ADD $2, R3 -PCALIGN $32 +PCALIGN $64 RET ` + var validPCAlignSrc = ` TEXT test(SB),0,$0-0 ADD $2, R3 PCALIGN $16 -MOVD $8, R4 -ADD $8, R4 -PCALIGN $16 +MOVD $8, R16 ADD $8, R4 +PCALIGN $32 +ADD $8, R3 PCALIGN $8 -ADD $4, R6 -PCALIGN $16 -ADD R2, R3, R4 +ADD $4, R8 RET ` @@ -39,6 +39,10 @@ RET // PCALIGN directive, to verify correct values are and // accepted, and incorrect values are flagged in error. func TestPCalign(t *testing.T) { + var pattern8 = `0x...8\s.*ADD\s..,\sR8` + var pattern16 = `0x...[80]\s.*MOVD\s..,\sR16` + var pattern32 = `0x...0\s.*ADD\s..,\sR3` + testenv.MustHaveGoBuild(t) dir, err := ioutil.TempDir("", "testpcalign") @@ -63,6 +67,30 @@ func TestPCalign(t *testing.T) { t.Errorf("Build failed: %v, output: %s", err, out) } + matched, err := regexp.MatchString(pattern8, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The 8 byte alignment is not correct: %t, output:%s\n", matched, out) + } + + matched, err = regexp.MatchString(pattern16, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The 16 byte alignment is not correct: %t, output:%s\n", matched, out) + } + + matched, err = regexp.MatchString(pattern32, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The 32 byte alignment is not correct: %t, output:%s\n", matched, out) + } + // generate a test with invalid use of PCALIGN tmpfile = filepath.Join(dir, "xi.s") -- 2.48.1