From c6bf9a81095bdbd3631cedc66ce690fa070d1a38 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Mon, 17 Sep 2018 02:05:22 +0000 Subject: [PATCH] cmd/compile: optimize AMD64's bit wise operation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently "arr[idx] |= 0x80" is compiled to MOVLload->BTSL->MOVLstore. And this CL optimizes it to a single BTSLconstmodify. Other bit wise operations with a direct memory operand are also implemented. 1. The size of the executable bin/go decreases about 4KB, and the total size of pkg/linux_amd64 (excluding cmd/compile) decreases about 0.6KB. 2. There a little improvement in the go1 benchmark test (excluding noise). name old time/op new time/op delta BinaryTree17-4 2.66s ± 4% 2.66s ± 3% ~ (p=0.596 n=49+49) Fannkuch11-4 2.38s ± 2% 2.32s ± 2% -2.69% (p=0.000 n=50+50) FmtFprintfEmpty-4 42.7ns ± 4% 43.2ns ± 7% +1.31% (p=0.009 n=50+50) FmtFprintfString-4 71.0ns ± 5% 72.0ns ± 3% +1.33% (p=0.000 n=50+50) FmtFprintfInt-4 80.7ns ± 4% 80.6ns ± 3% ~ (p=0.931 n=50+50) FmtFprintfIntInt-4 125ns ± 3% 126ns ± 4% ~ (p=0.051 n=50+50) FmtFprintfPrefixedInt-4 158ns ± 1% 142ns ± 3% -9.84% (p=0.000 n=36+50) FmtFprintfFloat-4 215ns ± 4% 212ns ± 4% -1.23% (p=0.002 n=50+50) FmtManyArgs-4 519ns ± 3% 510ns ± 3% -1.77% (p=0.000 n=50+50) GobDecode-4 6.49ms ± 6% 6.52ms ± 5% ~ (p=0.866 n=50+50) GobEncode-4 5.93ms ± 8% 6.01ms ± 7% ~ (p=0.076 n=50+50) Gzip-4 222ms ± 4% 224ms ± 8% +0.80% (p=0.001 n=50+50) Gunzip-4 36.6ms ± 5% 36.4ms ± 4% ~ (p=0.093 n=50+50) HTTPClientServer-4 59.1µs ± 1% 58.9µs ± 2% -0.24% (p=0.039 n=49+48) JSONEncode-4 9.23ms ± 4% 9.21ms ± 5% ~ (p=0.244 n=50+50) JSONDecode-4 48.8ms ± 4% 48.7ms ± 4% ~ (p=0.653 n=50+50) Mandelbrot200-4 3.81ms ± 4% 3.80ms ± 3% ~ (p=0.834 n=50+50) GoParse-4 3.20ms ± 5% 3.19ms ± 5% ~ (p=0.494 n=50+50) RegexpMatchEasy0_32-4 78.1ns ± 2% 77.4ns ± 3% -0.86% (p=0.005 n=50+50) RegexpMatchEasy0_1K-4 233ns ± 3% 233ns ± 3% ~ (p=0.074 n=50+50) RegexpMatchEasy1_32-4 74.2ns ± 3% 73.4ns ± 3% -1.06% (p=0.000 n=50+50) RegexpMatchEasy1_1K-4 369ns ± 2% 364ns ± 4% -1.41% (p=0.000 n=36+50) RegexpMatchMedium_32-4 109ns ± 4% 107ns ± 3% -2.06% (p=0.001 n=50+50) RegexpMatchMedium_1K-4 31.5µs ± 3% 30.8µs ± 3% -2.20% (p=0.000 n=50+50) RegexpMatchHard_32-4 1.57µs ± 3% 1.56µs ± 2% -0.57% (p=0.016 n=50+50) RegexpMatchHard_1K-4 47.4µs ± 4% 47.0µs ± 3% -0.82% (p=0.008 n=50+50) Revcomp-4 414ms ± 7% 412ms ± 7% ~ (p=0.285 n=50+50) Template-4 64.3ms ± 4% 62.7ms ± 3% -2.44% (p=0.000 n=50+50) TimeParse-4 316ns ± 3% 313ns ± 3% ~ (p=0.122 n=50+50) TimeFormat-4 291ns ± 3% 293ns ± 3% +0.80% (p=0.001 n=50+50) [Geo mean] 46.5µs 46.2µs -0.81% name old speed new speed delta GobDecode-4 118MB/s ± 6% 118MB/s ± 5% ~ (p=0.863 n=50+50) GobEncode-4 130MB/s ± 9% 128MB/s ± 8% ~ (p=0.076 n=50+50) Gzip-4 87.4MB/s ± 4% 86.8MB/s ± 7% -0.78% (p=0.002 n=50+50) Gunzip-4 531MB/s ± 5% 533MB/s ± 4% ~ (p=0.093 n=50+50) JSONEncode-4 210MB/s ± 4% 211MB/s ± 5% ~ (p=0.247 n=50+50) JSONDecode-4 39.8MB/s ± 4% 39.9MB/s ± 4% ~ (p=0.654 n=50+50) GoParse-4 18.1MB/s ± 5% 18.2MB/s ± 5% ~ (p=0.493 n=50+50) RegexpMatchEasy0_32-4 410MB/s ± 2% 413MB/s ± 3% +0.86% (p=0.004 n=50+50) RegexpMatchEasy0_1K-4 4.39GB/s ± 3% 4.38GB/s ± 3% ~ (p=0.063 n=50+50) RegexpMatchEasy1_32-4 432MB/s ± 3% 436MB/s ± 3% +1.07% (p=0.000 n=50+50) RegexpMatchEasy1_1K-4 2.77GB/s ± 2% 2.81GB/s ± 4% +1.46% (p=0.000 n=36+50) RegexpMatchMedium_32-4 9.16MB/s ± 3% 9.35MB/s ± 4% +2.09% (p=0.001 n=50+50) RegexpMatchMedium_1K-4 32.5MB/s ± 3% 33.2MB/s ± 3% +2.25% (p=0.000 n=50+50) RegexpMatchHard_32-4 20.4MB/s ± 3% 20.5MB/s ± 2% +0.56% (p=0.017 n=50+50) RegexpMatchHard_1K-4 21.6MB/s ± 4% 21.8MB/s ± 3% +0.83% (p=0.008 n=50+50) Revcomp-4 613MB/s ± 4% 618MB/s ± 7% ~ (p=0.152 n=48+50) Template-4 30.2MB/s ± 4% 30.9MB/s ± 3% +2.49% (p=0.000 n=50+50) [Geo mean] 127MB/s 128MB/s +0.64% Change-Id: If405198283855d75697f66cf894b2bef458f620e Reviewed-on: https://go-review.googlesource.com/135422 Reviewed-by: Keith Randall --- src/cmd/compile/internal/amd64/ssa.go | 4 +- src/cmd/compile/internal/ssa/gen/AMD64.rules | 68 +- src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 14 + src/cmd/compile/internal/ssa/opGen.go | 186 ++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 1793 ++++++++++++++++-- test/codegen/bits.go | 6 + 6 files changed, 1938 insertions(+), 133 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 818bc35941..b4c4b1f4cd 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -695,6 +695,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, + ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify, ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify: p := s.Prog(v.Op.Asm()) @@ -763,7 +764,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { } fallthrough case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, - ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: + ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify, + ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: sc := v.AuxValAndOff() off := sc.Off() val := sc.Val() diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 803b8896b0..76a4fc9ab7 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -709,7 +709,17 @@ (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x) (AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x) +(BTR(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [d &^ 1< (AND(L|Q)const [c &^ 1< (AND(L|Q)const [^(1< (XOR(L|Q)const [c ^ d] x) +(BTC(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [d ^ 1< (XOR(L|Q)const [c ^ 1< (XOR(L|Q)const [1< (OR(L|Q)const [c | d] x) +(OR(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [c | 1< (OR(L|Q)const [d | 1< (OR(L|Q)const [1< (MULLconst [int64(int32(c * d))] x) (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x) @@ -1051,14 +1061,14 @@ ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) -((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> - ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) -((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> - ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) -((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> - ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem) -((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> - ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) +((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) +((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) +((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem) +((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem) // Fold constants into stores. (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> @@ -1106,18 +1116,18 @@ ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) -((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) +((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> - ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) -((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) +((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> - ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) -((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) +((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) -((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) +((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) // generating indexed loads and stores (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> @@ -1424,6 +1434,12 @@ (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) (NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) (NOTL (MOVLconst [c])) -> (MOVLconst [^c]) +(BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1< (MOVLconst [d|(1< (MOVQconst [d&^(1< (MOVLconst [d&^(1< (MOVQconst [d^(1< (MOVLconst [d^(1< ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) -(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> - ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) +(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) -(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> - ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem) // Merge ADDQconst and LEAQ into atomic loads. (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> @@ -2392,12 +2408,12 @@ (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) -(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> - ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) -(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> - ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) +(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) +(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) // float <-> int register moves, with no conversion. // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 68facebe47..017c07071d 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -289,6 +289,20 @@ func init() { {name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32 {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64 + // direct bit operation on memory operand + {name: "BTCQmodify", argLength: 3, reg: gpstore, asm: "BTCQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 64-bit arg0+auxint+aux, arg2=mem + {name: "BTCLmodify", argLength: 3, reg: gpstore, asm: "BTCL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 32-bit arg0+auxint+aux, arg2=mem + {name: "BTSQmodify", argLength: 3, reg: gpstore, asm: "BTSQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 64-bit arg0+auxint+aux, arg2=mem + {name: "BTSLmodify", argLength: 3, reg: gpstore, asm: "BTSL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 32-bit arg0+auxint+aux, arg2=mem + {name: "BTRQmodify", argLength: 3, reg: gpstore, asm: "BTRQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 64-bit arg0+auxint+aux, arg2=mem + {name: "BTRLmodify", argLength: 3, reg: gpstore, asm: "BTRL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 32-bit arg0+auxint+aux, arg2=mem + {name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTCLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTSLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTRLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 77b9875fd6..fe63633750 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -550,6 +550,18 @@ const ( OpAMD64BTRQconst OpAMD64BTSLconst OpAMD64BTSQconst + OpAMD64BTCQmodify + OpAMD64BTCLmodify + OpAMD64BTSQmodify + OpAMD64BTSLmodify + OpAMD64BTRQmodify + OpAMD64BTRLmodify + OpAMD64BTCQconstmodify + OpAMD64BTCLconstmodify + OpAMD64BTSQconstmodify + OpAMD64BTSLconstmodify + OpAMD64BTRQconstmodify + OpAMD64BTRLconstmodify OpAMD64TESTQ OpAMD64TESTL OpAMD64TESTW @@ -6901,6 +6913,180 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "BTCQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTCLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTSQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTSLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTRQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTRLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTCQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTCLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTSQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTSLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTRQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTRLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, { name: "TESTQ", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 98b36a96a0..cd82a5642c 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -65,18 +65,46 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ANDQmodify_0(v) case OpAMD64BSFQ: return rewriteValueAMD64_OpAMD64BSFQ_0(v) + case OpAMD64BTCLconst: + return rewriteValueAMD64_OpAMD64BTCLconst_0(v) + case OpAMD64BTCLconstmodify: + return rewriteValueAMD64_OpAMD64BTCLconstmodify_0(v) + case OpAMD64BTCLmodify: + return rewriteValueAMD64_OpAMD64BTCLmodify_0(v) + case OpAMD64BTCQconst: + return rewriteValueAMD64_OpAMD64BTCQconst_0(v) + case OpAMD64BTCQconstmodify: + return rewriteValueAMD64_OpAMD64BTCQconstmodify_0(v) + case OpAMD64BTCQmodify: + return rewriteValueAMD64_OpAMD64BTCQmodify_0(v) case OpAMD64BTLconst: return rewriteValueAMD64_OpAMD64BTLconst_0(v) case OpAMD64BTQconst: return rewriteValueAMD64_OpAMD64BTQconst_0(v) case OpAMD64BTRLconst: return rewriteValueAMD64_OpAMD64BTRLconst_0(v) + case OpAMD64BTRLconstmodify: + return rewriteValueAMD64_OpAMD64BTRLconstmodify_0(v) + case OpAMD64BTRLmodify: + return rewriteValueAMD64_OpAMD64BTRLmodify_0(v) case OpAMD64BTRQconst: return rewriteValueAMD64_OpAMD64BTRQconst_0(v) + case OpAMD64BTRQconstmodify: + return rewriteValueAMD64_OpAMD64BTRQconstmodify_0(v) + case OpAMD64BTRQmodify: + return rewriteValueAMD64_OpAMD64BTRQmodify_0(v) case OpAMD64BTSLconst: return rewriteValueAMD64_OpAMD64BTSLconst_0(v) + case OpAMD64BTSLconstmodify: + return rewriteValueAMD64_OpAMD64BTSLconstmodify_0(v) + case OpAMD64BTSLmodify: + return rewriteValueAMD64_OpAMD64BTSLmodify_0(v) case OpAMD64BTSQconst: return rewriteValueAMD64_OpAMD64BTSQconst_0(v) + case OpAMD64BTSQconstmodify: + return rewriteValueAMD64_OpAMD64BTSQconstmodify_0(v) + case OpAMD64BTSQmodify: + return rewriteValueAMD64_OpAMD64BTSQmodify_0(v) case OpAMD64CMOVLCC: return rewriteValueAMD64_OpAMD64CMOVLCC_0(v) case OpAMD64CMOVLCS: @@ -278,7 +306,7 @@ func rewriteValueAMD64(v *Value) bool { case OpAMD64MOVQloadidx8: return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v) case OpAMD64MOVQstore: - return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v) + return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v) || rewriteValueAMD64_OpAMD64MOVQstore_30(v) case OpAMD64MOVQstoreconst: return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v) case OpAMD64MOVQstoreconstidx1: @@ -3590,6 +3618,22 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ANDLconst [c] (BTRLconst [d] x)) + // cond: + // result: (ANDLconst [c &^ 1<