From 8eec2bbfbc4f209950f677906c6ce67e01d32930 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 11 Mar 2016 00:10:52 -0500 Subject: [PATCH] cmd/compile: added some intrinsics to SSA back end One intrinsic was needed to help get the very best performance out of a future GC; as long as that one was being added, I also added Bswap since that is sometimes a handy thing to have. I had intended to fill out the bit-scan intrinsic family, but the mismatch between the "scan forward" instruction and "count leading zeroes" was large enough to cause me to leave it out -- it poses a dilemma that I'd rather dodge right now. These intrinsics are not exposed for general use. That's a separate issue requiring an API proposal change ( https://github.com/golang/proposal ) All intrinsics are tested, both that they are substituted on the appropriate architecture, and that they produce the expected result. Change-Id: I5848037cfd97de4f75bdc33bdd89bba00af4a8ee Reviewed-on: https://go-review.googlesource.com/20564 Reviewed-by: Keith Randall Run-TryBot: David Chase TryBot-Result: Gobot Gobot --- src/cmd/compile/internal/amd64/prog.go | 58 ++- src/cmd/compile/internal/amd64/ssa.go | 36 +- src/cmd/compile/internal/gc/go.go | 7 +- src/cmd/compile/internal/gc/inl.go | 2 +- src/cmd/compile/internal/gc/ssa.go | 77 +++- src/cmd/compile/internal/ssa/compile.go | 18 + src/cmd/compile/internal/ssa/gen/AMD64.rules | 32 ++ src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 29 +- .../compile/internal/ssa/gen/genericOps.go | 11 + src/cmd/compile/internal/ssa/opGen.go | 278 +++++++++++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 383 ++++++++++++++++++ src/runtime/internal/sys/intrinsics.go | 105 +++++ test/intrinsic.dir/main.go | 109 +++++ test/intrinsic.go | 8 + test/run.go | 26 +- 15 files changed, 1143 insertions(+), 36 deletions(-) create mode 100644 src/runtime/internal/sys/intrinsics.go create mode 100644 test/intrinsic.dir/main.go create mode 100644 test/intrinsic.go diff --git a/src/cmd/compile/internal/amd64/prog.go b/src/cmd/compile/internal/amd64/prog.go index 55ea7ee82a..91b479be22 100644 --- a/src/cmd/compile/internal/amd64/prog.go +++ b/src/cmd/compile/internal/amd64/prog.go @@ -36,26 +36,44 @@ var progtable = [x86.ALAST & obj.AMask]obj.ProgInfo{ // NOP is an internal no-op that also stands // for USED and SET annotations, not the Intel opcode. - obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite}, - x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, - x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, - x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, - x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr}, - x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr}, - x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry}, - obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry}, - x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, - x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, - x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, - x86.ACLD & obj.AMask: {Flags: gc.OK}, - x86.ASTD & obj.AMask: {Flags: gc.OK}, + obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite}, + x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, + x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, + x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, + x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr}, + x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr}, + x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry}, + + x86.ABSFL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSFQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSFW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSRL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSRQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSRW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSWAPL & obj.AMask: {Flags: gc.SizeL | RightRdwr}, + x86.ABSWAPQ & obj.AMask: {Flags: gc.SizeQ | RightRdwr}, + + obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry}, + x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, + x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, + x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, + x86.ACLD & obj.AMask: {Flags: gc.OK}, + x86.ASTD & obj.AMask: {Flags: gc.OK}, + + x86.ACMOVLEQ & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVLNE & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVQEQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVQNE & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVWEQ & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVWNE & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMPB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | gc.RightRead | gc.SetCarry}, x86.ACMPL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.SetCarry}, x86.ACMPQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.SetCarry}, diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 307ba28e5e..dfacff6f40 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -477,6 +477,33 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r + + case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst, + ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst: + r := gc.SSARegNum(v) + x := gc.SSARegNum(v.Args[0]) + // Arg0 is in/out, move in to out if not already same + if r != x { + p := gc.Prog(moveByType(v.Type)) + p.From.Type = obj.TYPE_REG + p.From.Reg = x + p.To.Type = obj.TYPE_REG + p.To.Reg = r + } + + // Constant into AX, after arg0 movement in case arg0 is in AX + p := gc.Prog(moveByType(v.Type)) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt2Int64() + p.To.Type = obj.TYPE_REG + p.To.Reg = x86.REG_AX + + p = gc.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = x86.REG_AX + p.To.Type = obj.TYPE_REG + p.To.Reg = r + case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) @@ -955,6 +982,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { gc.Maxarg = v.AuxInt } case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB, + ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB: x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) @@ -968,7 +996,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r - case ssa.OpAMD64SQRTSD: + case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW, + ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW, + ssa.OpAMD64SQRTSD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) @@ -1008,9 +1038,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64InvertFlags: - v.Fatalf("InvertFlags should never make it to codegen %v", v) + v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: - v.Fatalf("Flag* ops should never make it to codegen %v", v) + v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpAMD64REPSTOSQ: gc.Prog(x86.AREP) gc.Prog(x86.ASTOSQ) diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go index 78c177e616..448a0fd322 100644 --- a/src/cmd/compile/internal/gc/go.go +++ b/src/cmd/compile/internal/gc/go.go @@ -55,8 +55,8 @@ func (v Val) Ctype() Ctype { } type Pkg struct { - Name string // package name - Path string // string literal used in import statement + Name string // package name, e.g. "sys" + Path string // string literal used in import statement, e.g. "runtime/internal/sys" Pathsym *Sym Prefix string // escaped path for use in symbol table Imported bool // export data of this package was parsed @@ -469,6 +469,9 @@ const ( // Set, use, or kill of carry bit. // Kill means we never look at the carry bit after this kind of instruction. + // Originally for understanding ADC, RCR, and so on, but now also + // tracks set, use, and kill of the zero and overflow bits as well. + // TODO rename to {Set,Use,Kill}Flags SetCarry = 1 << 24 UseCarry = 1 << 25 KillCarry = 1 << 26 diff --git a/src/cmd/compile/internal/gc/inl.go b/src/cmd/compile/internal/gc/inl.go index ff0791c537..e25ce132da 100644 --- a/src/cmd/compile/internal/gc/inl.go +++ b/src/cmd/compile/internal/gc/inl.go @@ -453,7 +453,7 @@ func inlnode(n *Node) *Node { if Debug['m'] > 3 { fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign)) } - if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 { // normal case + if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 && !isIntrinsicCall1(n) { // normal case n = mkinlcall(n, n.Left, n.Isddd) } else if n.Left.Op == ONAME && n.Left.Left != nil && n.Left.Left.Op == OTYPE && n.Left.Right != nil && n.Left.Right.Op == ONAME { // methods called as functions if n.Left.Sym.Def != nil { diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 93b820b17e..9b8ef20fed 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -2052,7 +2052,13 @@ func (s *state) expr(n *Node) *ssa.Value { p, l, c := s.slice(n.Left.Type, v, i, j, k) return s.newValue3(ssa.OpSliceMake, n.Type, p, l, c) - case OCALLFUNC, OCALLINTER, OCALLMETH: + case OCALLFUNC: + if isIntrinsicCall1(n) { + return s.intrinsicCall1(n) + } + fallthrough + + case OCALLINTER, OCALLMETH: a := s.call(n, callNormal) return s.newValue2(ssa.OpLoad, n.Type, a, s.mem()) @@ -2373,6 +2379,75 @@ const ( callGo ) +// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic +// that can be handled by the SSA backend. +// SSA uses this, but so does the front end to see if should not +// inline a function because it is a candidate for intrinsic +// substitution. +func isSSAIntrinsic1(s *Sym) bool { + // The test below is not quite accurate -- in the event that + // a function is disabled on a per-function basis, for example + // because of hash-keyed binary failure search, SSA might be + // disabled for that function but it would not be noted here, + // and thus an inlining would not occur (in practice, inlining + // so far has only been noticed for Bswap32 and the 16-bit count + // leading/trailing instructions, but heuristics might change + // in the future or on different architectures). + if !ssaEnabled || ssa.IntrinsicsDisable || Thearch.Thechar != '6' { + return false + } + if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" { + switch s.Name { + case + "Ctz64", "Ctz32", "Ctz16", + "Bswap64", "Bswap32": + return true + } + } + return false +} + +func isIntrinsicCall1(n *Node) bool { + if n == nil || n.Left == nil { + return false + } + return isSSAIntrinsic1(n.Left.Sym) +} + +// intrinsicFirstArg extracts arg from n.List and eval +func (s *state) intrinsicFirstArg(n *Node) *ssa.Value { + x := n.List.First() + if x.Op == OAS { + x = x.Right + } + return s.expr(x) +} + +// intrinsicCall1 converts a call to a recognized 1-arg intrinsic +// into the intrinsic +func (s *state) intrinsicCall1(n *Node) *ssa.Value { + var result *ssa.Value + switch n.Left.Sym.Name { + case "Ctz64": + result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n)) + case "Ctz32": + result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n)) + case "Ctz16": + result = s.newValue1(ssa.OpCtz16, Types[TUINT16], s.intrinsicFirstArg(n)) + case "Bswap64": + result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n)) + case "Bswap32": + result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n)) + } + if result == nil { + Fatalf("Unknown special call: %v", n.Left.Sym) + } + if ssa.IntrinsicsDebug > 0 { + Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString()) + } + return result +} + // Calls the function n using the specified call type. // Returns the address of the return value (or nil if none). func (s *state) call(n *Node, k callKind) *ssa.Value { diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index b8e2b42c3e..d6c2bf83ef 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -120,6 +120,10 @@ type pass struct { // Run consistency checker between each phase var checkEnabled = false +// Debug output +var IntrinsicsDebug int +var IntrinsicsDisable bool + // PhaseOption sets the specified flag in the specified ssa phase, // returning empty string if this was successful or a string explaining // the error if it was not. @@ -157,6 +161,20 @@ func PhaseOption(phase, flag string, val int) string { } } + if phase == "intrinsics" { + switch flag { + case "on": + IntrinsicsDisable = val == 0 + case "off": + IntrinsicsDisable = val != 0 + case "debug": + IntrinsicsDebug = val + default: + return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase) + } + return "" + } + underphase := strings.Replace(phase, "_", " ", -1) var re *regexp.Regexp if phase[0] == '~' { diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index b595912cc6..cc210978ef 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -92,6 +92,38 @@ (Com16 x) -> (NOTW x) (Com8 x) -> (NOTB x) +// CMPQconst 0 below is redundant because BSF sets Z but how to remove? +(Ctz64 x) -> (CMOVQEQconst (BSFQ x) (CMPQconst x [0]) [64]) +(Ctz32 x) -> (CMOVLEQconst (BSFL x) (CMPLconst x [0]) [32]) +(Ctz16 x) -> (CMOVWEQconst (BSFW x) (CMPWconst x [0]) [16]) + +(CMOVQEQconst x (InvertFlags y) [c]) -> (CMOVQNEconst x y [c]) +(CMOVLEQconst x (InvertFlags y) [c]) -> (CMOVLNEconst x y [c]) +(CMOVWEQconst x (InvertFlags y) [c]) -> (CMOVWNEconst x y [c]) + +(CMOVQEQconst _ (FlagEQ) [c]) -> (Const64 [c]) +(CMOVLEQconst _ (FlagEQ) [c]) -> (Const32 [c]) +(CMOVWEQconst _ (FlagEQ) [c]) -> (Const16 [c]) + +(CMOVQEQconst x (FlagLT_ULT)) -> x +(CMOVLEQconst x (FlagLT_ULT)) -> x +(CMOVWEQconst x (FlagLT_ULT)) -> x + +(CMOVQEQconst x (FlagLT_UGT)) -> x +(CMOVLEQconst x (FlagLT_UGT)) -> x +(CMOVWEQconst x (FlagLT_UGT)) -> x + +(CMOVQEQconst x (FlagGT_ULT)) -> x +(CMOVLEQconst x (FlagGT_ULT)) -> x +(CMOVWEQconst x (FlagGT_ULT)) -> x + +(CMOVQEQconst x (FlagGT_UGT)) -> x +(CMOVLEQconst x (FlagGT_UGT)) -> x +(CMOVWEQconst x (FlagGT_UGT)) -> x + +(Bswap64 x) -> (BSWAPQ x) +(Bswap32 x) -> (BSWAPL x) + (Sqrt x) -> (SQRTSD x) // Note: we always extend to 64 bits even though some ops don't need that many result bits. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 116e3ff9e3..9dc09aab53 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -103,9 +103,13 @@ func init() { gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax | flags} - gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly} - gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly} - flagsgp = regInfo{inputs: flagsonly, outputs: gponly} + gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly} + gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly} + flagsgp = regInfo{inputs: flagsonly, outputs: gponly} + + // for CMOVconst -- uses AX to hold constant temporary. AX input is moved before temp. + gp1flagsgp = regInfo{inputs: []regMask{gp, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} + readflags = regInfo{inputs: flagsonly, outputs: gponly} flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} @@ -307,6 +311,25 @@ func init() { {name: "NOTW", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0 {name: "NOTB", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0 + {name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero + {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero + {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW"}, // arg0 # of low-order zeroes ; undef if zero + + {name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ"}, // arg0 # of high-order zeroes ; undef if zero + {name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL"}, // arg0 # of high-order zeroes ; undef if zero + {name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW"}, // arg0 # of high-order zeroes ; undef if zero + + // Note ASM for ops moves whole register + {name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z set + {name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z set + {name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z set + {name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z not set + {name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z not set + {name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z not set + + {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, // arg0 swap bytes + {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes + {name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0) {name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear. diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index ab5e335765..6d92926e3a 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -237,6 +237,17 @@ var genericOps = []opData{ {name: "Com32", argLength: 1}, {name: "Com64", argLength: 1}, + {name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16) + {name: "Ctz32", argLength: 1}, // Count trailing zeroes (returns 0-32) + {name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64) + + {name: "Clz16", argLength: 1}, // Count leading (high order) zeroes (returns 0-16) + {name: "Clz32", argLength: 1}, // Count leading zeroes (returns 0-32) + {name: "Clz64", argLength: 1}, // Count leading zeroes (returns 0-64) + + {name: "Bswap32", argLength: 1}, // Swap bytes + {name: "Bswap64", argLength: 1}, // Swap bytes + {name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only // Data movement, max argument length for Phi is indefinite so just pick diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 3ff2b5ac60..e76efd40ca 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -237,6 +237,20 @@ const ( OpAMD64NOTL OpAMD64NOTW OpAMD64NOTB + OpAMD64BSFQ + OpAMD64BSFL + OpAMD64BSFW + OpAMD64BSRQ + OpAMD64BSRL + OpAMD64BSRW + OpAMD64CMOVQEQconst + OpAMD64CMOVLEQconst + OpAMD64CMOVWEQconst + OpAMD64CMOVQNEconst + OpAMD64CMOVLNEconst + OpAMD64CMOVWNEconst + OpAMD64BSWAPQ + OpAMD64BSWAPL OpAMD64SQRTSD OpAMD64SBBQcarrymask OpAMD64SBBLcarrymask @@ -521,6 +535,14 @@ const ( OpCom16 OpCom32 OpCom64 + OpCtz16 + OpCtz32 + OpCtz64 + OpClz16 + OpClz32 + OpClz64 + OpBswap32 + OpBswap64 OpSqrt OpPhi OpCopy @@ -2803,6 +2825,222 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "BSFQ", + argLen: 1, + asm: x86.ABSFQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSFL", + argLen: 1, + asm: x86.ABSFL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSFW", + argLen: 1, + asm: x86.ABSFW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSRQ", + argLen: 1, + asm: x86.ABSRQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSRL", + argLen: 1, + asm: x86.ABSRL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSRW", + argLen: 1, + asm: x86.ABSRW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVQEQconst", + auxType: auxInt64, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVQEQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVLEQconst", + auxType: auxInt32, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVLEQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVWEQconst", + auxType: auxInt16, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVLEQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVQNEconst", + auxType: auxInt64, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVQNE, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVLNEconst", + auxType: auxInt32, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVLNE, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVWNEconst", + auxType: auxInt16, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVLNE, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSWAPQ", + argLen: 1, + resultInArg0: true, + asm: x86.ABSWAPQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSWAPL", + argLen: 1, + resultInArg0: true, + asm: x86.ABSWAPL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, { name: "SQRTSD", argLen: 1, @@ -4981,6 +5219,46 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "Ctz16", + argLen: 1, + generic: true, + }, + { + name: "Ctz32", + argLen: 1, + generic: true, + }, + { + name: "Ctz64", + argLen: 1, + generic: true, + }, + { + name: "Clz16", + argLen: 1, + generic: true, + }, + { + name: "Clz32", + argLen: 1, + generic: true, + }, + { + name: "Clz64", + argLen: 1, + generic: true, + }, + { + name: "Bswap32", + argLen: 1, + generic: true, + }, + { + name: "Bswap64", + argLen: 1, + generic: true, + }, { name: "Sqrt", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 046973859a..8dd1b15f13 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -66,6 +66,16 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAnd8(v, config) case OpAvg64u: return rewriteValueAMD64_OpAvg64u(v, config) + case OpBswap32: + return rewriteValueAMD64_OpBswap32(v, config) + case OpBswap64: + return rewriteValueAMD64_OpBswap64(v, config) + case OpAMD64CMOVLEQconst: + return rewriteValueAMD64_OpAMD64CMOVLEQconst(v, config) + case OpAMD64CMOVQEQconst: + return rewriteValueAMD64_OpAMD64CMOVQEQconst(v, config) + case OpAMD64CMOVWEQconst: + return rewriteValueAMD64_OpAMD64CMOVWEQconst(v, config) case OpAMD64CMPB: return rewriteValueAMD64_OpAMD64CMPB(v, config) case OpAMD64CMPBconst: @@ -110,6 +120,12 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpConstNil(v, config) case OpConvert: return rewriteValueAMD64_OpConvert(v, config) + case OpCtz16: + return rewriteValueAMD64_OpCtz16(v, config) + case OpCtz32: + return rewriteValueAMD64_OpCtz32(v, config) + case OpCtz64: + return rewriteValueAMD64_OpCtz64(v, config) case OpCvt32Fto32: return rewriteValueAMD64_OpCvt32Fto32(v, config) case OpCvt32Fto64: @@ -2119,6 +2135,307 @@ func rewriteValueAMD64_OpAvg64u(v *Value, config *Config) bool { } return false } +func rewriteValueAMD64_OpBswap32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Bswap32 x) + // cond: + // result: (BSWAPL x) + for { + x := v.Args[0] + v.reset(OpAMD64BSWAPL) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpBswap64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Bswap64 x) + // cond: + // result: (BSWAPQ x) + for { + x := v.Args[0] + v.reset(OpAMD64BSWAPQ) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64CMOVLEQconst(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (CMOVLEQconst x (InvertFlags y) [c]) + // cond: + // result: (CMOVLNEconst x y [c]) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64InvertFlags { + break + } + y := v_1.Args[0] + c := v.AuxInt + v.reset(OpAMD64CMOVLNEconst) + v.AddArg(x) + v.AddArg(y) + v.AuxInt = c + return true + } + // match: (CMOVLEQconst _ (FlagEQ) [c]) + // cond: + // result: (Const32 [c]) + for { + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagEQ { + break + } + c := v.AuxInt + v.reset(OpConst32) + v.AuxInt = c + return true + } + // match: (CMOVLEQconst x (FlagLT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVLEQconst x (FlagLT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVLEQconst x (FlagGT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVLEQconst x (FlagGT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64CMOVQEQconst(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (CMOVQEQconst x (InvertFlags y) [c]) + // cond: + // result: (CMOVQNEconst x y [c]) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64InvertFlags { + break + } + y := v_1.Args[0] + c := v.AuxInt + v.reset(OpAMD64CMOVQNEconst) + v.AddArg(x) + v.AddArg(y) + v.AuxInt = c + return true + } + // match: (CMOVQEQconst _ (FlagEQ) [c]) + // cond: + // result: (Const64 [c]) + for { + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagEQ { + break + } + c := v.AuxInt + v.reset(OpConst64) + v.AuxInt = c + return true + } + // match: (CMOVQEQconst x (FlagLT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVQEQconst x (FlagLT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVQEQconst x (FlagGT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVQEQconst x (FlagGT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64CMOVWEQconst(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (CMOVWEQconst x (InvertFlags y) [c]) + // cond: + // result: (CMOVWNEconst x y [c]) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64InvertFlags { + break + } + y := v_1.Args[0] + c := v.AuxInt + v.reset(OpAMD64CMOVWNEconst) + v.AddArg(x) + v.AddArg(y) + v.AuxInt = c + return true + } + // match: (CMOVWEQconst _ (FlagEQ) [c]) + // cond: + // result: (Const16 [c]) + for { + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagEQ { + break + } + c := v.AuxInt + v.reset(OpConst16) + v.AuxInt = c + return true + } + // match: (CMOVWEQconst x (FlagLT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVWEQconst x (FlagLT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVWEQconst x (FlagGT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVWEQconst x (FlagGT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + return false +} func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool { b := v.Block _ = b @@ -3026,6 +3343,72 @@ func rewriteValueAMD64_OpConvert(v *Value, config *Config) bool { } return false } +func rewriteValueAMD64_OpCtz16(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Ctz16 x) + // cond: + // result: (CMOVWEQconst (BSFW x) (CMPWconst x [0]) [16]) + for { + t := v.Type + x := v.Args[0] + v.reset(OpAMD64CMOVWEQconst) + v0 := b.NewValue0(v.Line, OpAMD64BSFW, t) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags) + v1.AddArg(x) + v1.AuxInt = 0 + v.AddArg(v1) + v.AuxInt = 16 + return true + } + return false +} +func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Ctz32 x) + // cond: + // result: (CMOVLEQconst (BSFL x) (CMPLconst x [0]) [32]) + for { + t := v.Type + x := v.Args[0] + v.reset(OpAMD64CMOVLEQconst) + v0 := b.NewValue0(v.Line, OpAMD64BSFL, t) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags) + v1.AddArg(x) + v1.AuxInt = 0 + v.AddArg(v1) + v.AuxInt = 32 + return true + } + return false +} +func rewriteValueAMD64_OpCtz64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Ctz64 x) + // cond: + // result: (CMOVQEQconst (BSFQ x) (CMPQconst x [0]) [64]) + for { + t := v.Type + x := v.Args[0] + v.reset(OpAMD64CMOVQEQconst) + v0 := b.NewValue0(v.Line, OpAMD64BSFQ, t) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags) + v1.AddArg(x) + v1.AuxInt = 0 + v.AddArg(v1) + v.AuxInt = 64 + return true + } + return false +} func rewriteValueAMD64_OpCvt32Fto32(v *Value, config *Config) bool { b := v.Block _ = b diff --git a/src/runtime/internal/sys/intrinsics.go b/src/runtime/internal/sys/intrinsics.go new file mode 100644 index 0000000000..8feb754dbd --- /dev/null +++ b/src/runtime/internal/sys/intrinsics.go @@ -0,0 +1,105 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sys + +// Ctz64 counts trailing (low-order) zeroes, +// and if all are zero, then 64. +func Ctz64(x uint64) uint64 { + if x&0xffffffff == 0 { + return 32 + uint64(Ctz32(uint32(x>>32))) + } + return uint64(Ctz32(uint32(x))) + +} + +// Ctz32 counts trailing (low-order) zeroes, +// and if all are zero, then 32. +func Ctz32(x uint32) uint32 { + if x&0xffff == 0 { + return 16 + uint32(Ctz16(uint16(x>>16))) + } + return uint32(Ctz16(uint16(x))) +} + +// Ctz16 counts trailing (low-order) zeroes, +// and if all are zero, then 16. +func Ctz16(x uint16) uint16 { + if x&0xff == 0 { + return 8 + uint16(Ctz8(uint8(x>>8))) + } + return uint16(Ctz8(uint8(x))) +} + +// Ctz8 counts trailing (low-order) zeroes, +// and if all are zero, then 8. +func Ctz8(x uint8) uint8 { + return ctzVals[x] +} + +var ctzVals = [256]uint8{ + 8, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0} + +// Bswap64 returns its input with byte order reversed +// 0x0102030405060708 -> 0x0807060504030201 +func Bswap64(x uint64) uint64 { + c8 := uint64(0xff00ff00ff00ff00) + a := (x & c8) >> 8 + b := (x &^ c8) << 8 + x = a | b + c16 := uint64(0xffff0000ffff0000) + a = (x & c16) >> 16 + b = (x &^ c16) << 16 + x = a | b + c32 := uint64(0xffffffff00000000) + a = (x & c32) >> 32 + b = (x &^ c32) << 32 + x = a | b + return x +} + +// Bswap32 returns its input with byte order reversed +// 0x01020304 -> 0x04030201 +func Bswap32(x uint32) uint32 { + c8 := uint32(0xff00ff00) + a := (x & c8) >> 8 + b := (x &^ c8) << 8 + x = a | b + c16 := uint32(0xffff0000) + a = (x & c16) >> 16 + b = (x &^ c16) << 16 + x = a | b + return x +} diff --git a/test/intrinsic.dir/main.go b/test/intrinsic.dir/main.go new file mode 100644 index 0000000000..46e6cb3283 --- /dev/null +++ b/test/intrinsic.dir/main.go @@ -0,0 +1,109 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + T "runtime/internal/sys" +) + +var A = []uint64{0x0102030405060708, 0x1122334455667788} +var B = []uint64{0x0807060504030201, 0x8877665544332211} + +var errors int + +func logf(f string, args ...interface{}) { + errors++ + fmt.Printf(f, args...) + if errors > 100 { // 100 is enough spewage + panic("100 errors is plenty is enough") + } +} + +func test(i, x uint64) { + t := T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64" + if i != t { + logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t) + } + x = -x + t = T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64" + if i != t { + logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t) + } + + if i <= 32 { + x32 := uint32(x) + t32 := T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32" + if uint32(i) != t32 { + logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32) + } + x32 = -x32 + t32 = T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32" + if uint32(i) != t32 { + logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32) + } + } + if i <= 16 { + x16 := uint16(x) + t16 := T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16" + if uint16(i) != t16 { + logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16) + } + x16 = -x16 + t16 = T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16" + if uint16(i) != t16 { + logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16) + } + } +} + +func main() { + // Test Bswap first because the other test relies on it + // working correctly (to implement bit reversal). + for i := range A { + x := A[i] + y := B[i] + X := T.Bswap64(x) // ERROR "intrinsic substitution for Bswap64" + Y := T.Bswap64(y) // ERROR "intrinsic substitution for Bswap64" + if y != X { + logf("Bswap64(0x%08x) expected 0x%08x but got 0x%08x\n", x, y, X) + } + if x != Y { + logf("Bswap64(0x%08x) expected 0x%08x but got 0x%08x\n", y, x, Y) + } + + x32 := uint32(X) + y32 := uint32(Y >> 32) + + X32 := T.Bswap32(x32) // ERROR "intrinsic substitution for Bswap32" + Y32 := T.Bswap32(y32) // ERROR "intrinsic substitution for Bswap32" + if y32 != X32 { + logf("Bswap32(0x%08x) expected 0x%08x but got 0x%08x\n", x32, y32, X32) + } + if x32 != Y32 { + logf("Bswap32(0x%08x) expected 0x%08x but got 0x%08x\n", y32, x32, Y32) + } + } + + // Zero is a special case, be sure it is done right. + if T.Ctz16(0) != 16 { // ERROR "intrinsic substitution for Ctz16" + logf("ctz16(0) != 16") + } + if T.Ctz32(0) != 32 { // ERROR "intrinsic substitution for Ctz32" + logf("ctz32(0) != 32") + } + if T.Ctz64(0) != 64 { // ERROR "intrinsic substitution for Ctz64" + logf("ctz64(0) != 64") + } + + for i := uint64(0); i <= 64; i++ { + for j := uint64(1); j <= 255; j += 2 { + for k := uint64(1); k <= 65537; k += 128 { + x := (j * k) << i + test(i, x) + } + } + } +} diff --git a/test/intrinsic.go b/test/intrinsic.go new file mode 100644 index 0000000000..f77412852d --- /dev/null +++ b/test/intrinsic.go @@ -0,0 +1,8 @@ +// errorcheckandrundir -0 -d=ssa/intrinsics/debug +// +build !ppc64,!ppc64le,amd64 + +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ignored diff --git a/test/run.go b/test/run.go index 53a859dda9..ffaf4d992a 100644 --- a/test/run.go +++ b/test/run.go @@ -34,6 +34,7 @@ import ( var ( verbose = flag.Bool("v", false, "verbose. if set, parallelism is set to 1.") + keep = flag.Bool("k", false, "keep. keep temporary directory.") numParallel = flag.Int("n", runtime.NumCPU(), "number of parallel tests to run") summary = flag.Bool("summary", false, "show summary of results") showSkips = flag.Bool("show_skips", false, "show skipped tests") @@ -201,8 +202,9 @@ func compileFile(runcmd runCmd, longname string) (out []byte, err error) { return runcmd(cmd...) } -func compileInDir(runcmd runCmd, dir string, names ...string) (out []byte, err error) { +func compileInDir(runcmd runCmd, dir string, flags []string, names ...string) (out []byte, err error) { cmd := []string{"go", "tool", "compile", "-e", "-D", ".", "-I", "."} + cmd = append(cmd, flags...) if *linkshared { cmd = append(cmd, "-dynlink", "-installsuffix=dynlink") } @@ -477,6 +479,9 @@ func (t *test) run() { fallthrough case "compile", "compiledir", "build", "run", "runoutput", "rundir": t.action = action + case "errorcheckandrundir": + wantError = false // should be no error if also will run + fallthrough case "errorcheck", "errorcheckdir", "errorcheckoutput": t.action = action wantError = true @@ -501,7 +506,9 @@ func (t *test) run() { } t.makeTempDir() - defer os.RemoveAll(t.tempDir) + if !*keep { + defer os.RemoveAll(t.tempDir) + } err = ioutil.WriteFile(filepath.Join(t.tempDir, t.gofile), srcBytes, 0644) check(err) @@ -577,13 +584,13 @@ func (t *test) run() { return } for _, gofiles := range pkgs { - _, t.err = compileInDir(runcmd, longdir, gofiles...) + _, t.err = compileInDir(runcmd, longdir, flags, gofiles...) if t.err != nil { return } } - case "errorcheckdir": + case "errorcheckdir", "errorcheckandrundir": // errorcheck all files in lexicographic order // useful for finding importing errors longdir := filepath.Join(cwd, t.goDirName()) @@ -593,7 +600,7 @@ func (t *test) run() { return } for i, gofiles := range pkgs { - out, err := compileInDir(runcmd, longdir, gofiles...) + out, err := compileInDir(runcmd, longdir, flags, gofiles...) if i == len(pkgs)-1 { if wantError && err == nil { t.err = fmt.Errorf("compilation succeeded unexpectedly\n%s", out) @@ -615,6 +622,10 @@ func (t *test) run() { break } } + if action == "errorcheckdir" { + return + } + fallthrough case "rundir": // Compile all files in the directory in lexicographic order. @@ -626,7 +637,7 @@ func (t *test) run() { return } for i, gofiles := range pkgs { - _, err := compileInDir(runcmd, longdir, gofiles...) + _, err := compileInDir(runcmd, longdir, flags, gofiles...) if err != nil { t.err = err return @@ -774,6 +785,9 @@ func (t *test) makeTempDir() { var err error t.tempDir, err = ioutil.TempDir("", "") check(err) + if *keep { + log.Printf("Temporary directory is %s", t.tempDir) + } } func (t *test) expectedOutput() string { -- 2.48.1