(MOVLstoreconst [makeValAndOff(0,3)] destptr
(MOVLstoreconst [0] destptr mem))
-(Zero [s] destptr mem) && s > 8 && s < 16 ->
+// Strip off any fractional word zeroing.
+(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE ->
+ (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
+ (MOVQstoreconst [0] destptr mem))
+
+// Zero small numbers of words directly.
+(Zero [16] destptr mem) && !config.useSSE ->
+ (MOVQstoreconst [makeValAndOff(0,8)] destptr
+ (MOVQstoreconst [0] destptr mem))
+(Zero [24] destptr mem) && !config.useSSE ->
+ (MOVQstoreconst [makeValAndOff(0,16)] destptr
+ (MOVQstoreconst [makeValAndOff(0,8)] destptr
+ (MOVQstoreconst [0] destptr mem)))
+(Zero [32] destptr mem) && !config.useSSE ->
+ (MOVQstoreconst [makeValAndOff(0,24)] destptr
+ (MOVQstoreconst [makeValAndOff(0,16)] destptr
+ (MOVQstoreconst [makeValAndOff(0,8)] destptr
+ (MOVQstoreconst [0] destptr mem))))
+
+(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,s-8)] destptr
(MOVQstoreconst [0] destptr mem))
// Adjust zeros to be a multiple of 16 bytes.
-(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 ->
+(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE ->
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVOstore destptr (MOVOconst [0]) mem))
-(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 ->
+(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE ->
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVQstoreconst [0] destptr mem))
-(Zero [16] destptr mem) ->
+(Zero [16] destptr mem) && config.useSSE ->
(MOVOstore destptr (MOVOconst [0]) mem)
-(Zero [32] destptr mem) ->
+(Zero [32] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem))
-(Zero [48] destptr mem) ->
+(Zero [48] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem)))
-(Zero [64] destptr mem) ->
+(Zero [64] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
// Large zeroing uses REP STOSQ.
(Zero [s] destptr mem)
- && (s > 1024 || (config.noDuffDevice && s > 64))
+ && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32))
&& s%8 == 0 ->
(REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
&& clobber(x)
-> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
+ && config.useSSE
&& x.Uses == 1
&& ValAndOff(c2).Off() + 8 == ValAndOff(c).Off()
&& ValAndOff(c).Val() == 0
case OpXor8:
return rewriteValueAMD64_OpXor8_0(v)
case OpZero:
- return rewriteValueAMD64_OpZero_0(v) || rewriteValueAMD64_OpZero_10(v)
+ return rewriteValueAMD64_OpZero_0(v) || rewriteValueAMD64_OpZero_10(v) || rewriteValueAMD64_OpZero_20(v)
case OpZeroExt16to32:
return rewriteValueAMD64_OpZeroExt16to32_0(v)
case OpZeroExt16to64:
func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
b := v.Block
_ = b
+ config := b.Func.Config
+ _ = config
// match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
// cond: ValAndOff(sc).canAdd(off)
// result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
return true
}
// match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
- // cond: x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
+ // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
// result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
for {
c := v.AuxInt
break
}
mem := x.Args[1]
- if !(x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
+ if !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
break
}
v.reset(OpAMD64MOVOstore)
func rewriteValueAMD64_OpZero_0(v *Value) bool {
b := v.Block
_ = b
+ config := b.Func.Config
+ _ = config
// match: (Zero [0] _ mem)
// cond:
// result: mem
return true
}
// match: (Zero [s] destptr mem)
- // cond: s > 8 && s < 16
- // result: (MOVQstoreconst [makeValAndOff(0,s-8)] destptr (MOVQstoreconst [0] destptr mem))
+ // cond: s%8 != 0 && s > 8 && !config.useSSE
+ // result: (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) (MOVQstoreconst [0] destptr mem))
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
- if !(s > 8 && s < 16) {
+ if !(s%8 != 0 && s > 8 && !config.useSSE) {
break
}
- v.reset(OpAMD64MOVQstoreconst)
- v.AuxInt = makeValAndOff(0, s-8)
- v.AddArg(destptr)
- v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
- v0.AuxInt = 0
+ v.reset(OpZero)
+ v.AuxInt = s - s%8
+ v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
+ v0.AuxInt = s % 8
v0.AddArg(destptr)
- v0.AddArg(mem)
v.AddArg(v0)
+ v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+ v1.AuxInt = 0
+ v1.AddArg(destptr)
+ v1.AddArg(mem)
+ v.AddArg(v1)
return true
}
return false
_ = b
config := b.Func.Config
_ = config
- typ := &b.Func.Config.Types
- _ = typ
+ // match: (Zero [16] destptr mem)
+ // cond: !config.useSSE
+ // result: (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem))
+ for {
+ if v.AuxInt != 16 {
+ break
+ }
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
+ if !(!config.useSSE) {
+ break
+ }
+ v.reset(OpAMD64MOVQstoreconst)
+ v.AuxInt = makeValAndOff(0, 8)
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+ v0.AuxInt = 0
+ v0.AddArg(destptr)
+ v0.AddArg(mem)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (Zero [24] destptr mem)
+ // cond: !config.useSSE
+ // result: (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)))
+ for {
+ if v.AuxInt != 24 {
+ break
+ }
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
+ if !(!config.useSSE) {
+ break
+ }
+ v.reset(OpAMD64MOVQstoreconst)
+ v.AuxInt = makeValAndOff(0, 16)
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+ v0.AuxInt = makeValAndOff(0, 8)
+ v0.AddArg(destptr)
+ v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+ v1.AuxInt = 0
+ v1.AddArg(destptr)
+ v1.AddArg(mem)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (Zero [32] destptr mem)
+ // cond: !config.useSSE
+ // result: (MOVQstoreconst [makeValAndOff(0,24)] destptr (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem))))
+ for {
+ if v.AuxInt != 32 {
+ break
+ }
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
+ if !(!config.useSSE) {
+ break
+ }
+ v.reset(OpAMD64MOVQstoreconst)
+ v.AuxInt = makeValAndOff(0, 24)
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+ v0.AuxInt = makeValAndOff(0, 16)
+ v0.AddArg(destptr)
+ v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+ v1.AuxInt = makeValAndOff(0, 8)
+ v1.AddArg(destptr)
+ v2 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+ v2.AuxInt = 0
+ v2.AddArg(destptr)
+ v2.AddArg(mem)
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (Zero [s] destptr mem)
+ // cond: s > 8 && s < 16 && config.useSSE
+ // result: (MOVQstoreconst [makeValAndOff(0,s-8)] destptr (MOVQstoreconst [0] destptr mem))
+ for {
+ s := v.AuxInt
+ _ = v.Args[1]
+ destptr := v.Args[0]
+ mem := v.Args[1]
+ if !(s > 8 && s < 16 && config.useSSE) {
+ break
+ }
+ v.reset(OpAMD64MOVQstoreconst)
+ v.AuxInt = makeValAndOff(0, s-8)
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+ v0.AuxInt = 0
+ v0.AddArg(destptr)
+ v0.AddArg(mem)
+ v.AddArg(v0)
+ return true
+ }
// match: (Zero [s] destptr mem)
- // cond: s%16 != 0 && s > 16 && s%16 > 8
+ // cond: s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE
// result: (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) (MOVOstore destptr (MOVOconst [0]) mem))
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
- if !(s%16 != 0 && s > 16 && s%16 > 8) {
+ if !(s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE) {
break
}
v.reset(OpZero)
return true
}
// match: (Zero [s] destptr mem)
- // cond: s%16 != 0 && s > 16 && s%16 <= 8
+ // cond: s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE
// result: (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) (MOVQstoreconst [0] destptr mem))
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
- if !(s%16 != 0 && s > 16 && s%16 <= 8) {
+ if !(s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE) {
break
}
v.reset(OpZero)
return true
}
// match: (Zero [16] destptr mem)
- // cond:
+ // cond: config.useSSE
// result: (MOVOstore destptr (MOVOconst [0]) mem)
for {
if v.AuxInt != 16 {
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
+ if !(config.useSSE) {
+ break
+ }
v.reset(OpAMD64MOVOstore)
v.AddArg(destptr)
v0 := b.NewValue0(v.Pos, OpAMD64MOVOconst, types.TypeInt128)
return true
}
// match: (Zero [32] destptr mem)
- // cond:
+ // cond: config.useSSE
// result: (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem))
for {
if v.AuxInt != 32 {
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
+ if !(config.useSSE) {
+ break
+ }
v.reset(OpAMD64MOVOstore)
v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
v0.AuxInt = 16
return true
}
// match: (Zero [48] destptr mem)
- // cond:
+ // cond: config.useSSE
// result: (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem)))
for {
if v.AuxInt != 48 {
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
+ if !(config.useSSE) {
+ break
+ }
v.reset(OpAMD64MOVOstore)
v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
v0.AuxInt = 32
return true
}
// match: (Zero [64] destptr mem)
- // cond:
+ // cond: config.useSSE
// result: (MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0]) (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem))))
for {
if v.AuxInt != 64 {
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
+ if !(config.useSSE) {
+ break
+ }
v.reset(OpAMD64MOVOstore)
v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
v0.AuxInt = 48
v.AddArg(v2)
return true
}
+ return false
+}
+func rewriteValueAMD64_OpZero_20(v *Value) bool {
+ b := v.Block
+ _ = b
+ config := b.Func.Config
+ _ = config
+ typ := &b.Func.Config.Types
+ _ = typ
// match: (Zero [s] destptr mem)
// cond: s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice
// result: (DUFFZERO [s] destptr (MOVOconst [0]) mem)
return true
}
// match: (Zero [s] destptr mem)
- // cond: (s > 1024 || (config.noDuffDevice && s > 64)) && s%8 == 0
+ // cond: (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0
// result: (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
for {
s := v.AuxInt
_ = v.Args[1]
destptr := v.Args[0]
mem := v.Args[1]
- if !((s > 1024 || (config.noDuffDevice && s > 64)) && s%8 == 0) {
+ if !((s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0) {
break
}
v.reset(OpAMD64REPSTOSQ)