`,
pos: []string{"\tXORPS\tX., X", "\tMOVUPS\tX., \\(.*\\)", "\tMOVQ\t\\$0, 16\\(.*\\)", "\tCALL\truntime\\.gcWriteBarrier\\(SB\\)"},
},
- // Rotate tests
- {
- fn: `
- func f20(x uint64) uint64 {
- return x<<7 | x>>57
- }
- `,
- pos: []string{"\tROLQ\t[$]7,"},
- },
- {
- fn: `
- func f21(x uint64) uint64 {
- return x<<7 + x>>57
- }
- `,
- pos: []string{"\tROLQ\t[$]7,"},
- },
- {
- fn: `
- func f22(x uint64) uint64 {
- return x<<7 ^ x>>57
- }
- `,
- pos: []string{"\tROLQ\t[$]7,"},
- },
- {
- fn: `
- func f23(x uint32) uint32 {
- return x<<7 + x>>25
- }
- `,
- pos: []string{"\tROLL\t[$]7,"},
- },
- {
- fn: `
- func f24(x uint32) uint32 {
- return x<<7 | x>>25
- }
- `,
- pos: []string{"\tROLL\t[$]7,"},
- },
- {
- fn: `
- func f25(x uint32) uint32 {
- return x<<7 ^ x>>25
- }
- `,
- pos: []string{"\tROLL\t[$]7,"},
- },
- {
- fn: `
- func f26(x uint16) uint16 {
- return x<<7 + x>>9
- }
- `,
- pos: []string{"\tROLW\t[$]7,"},
- },
- {
- fn: `
- func f27(x uint16) uint16 {
- return x<<7 | x>>9
- }
- `,
- pos: []string{"\tROLW\t[$]7,"},
- },
- {
- fn: `
- func f28(x uint16) uint16 {
- return x<<7 ^ x>>9
- }
- `,
- pos: []string{"\tROLW\t[$]7,"},
- },
- {
- fn: `
- func f29(x uint8) uint8 {
- return x<<7 + x>>1
- }
- `,
- pos: []string{"\tROLB\t[$]7,"},
- },
- {
- fn: `
- func f30(x uint8) uint8 {
- return x<<7 | x>>1
- }
- `,
- pos: []string{"\tROLB\t[$]7,"},
- },
- {
- fn: `
- func f31(x uint8) uint8 {
- return x<<7 ^ x>>1
- }
- `,
- pos: []string{"\tROLB\t[$]7,"},
- },
- // Rotate after inlining (see issue 18254).
- {
- fn: `
- func f32(x uint32) uint32 {
- return g(x, 7)
- }
- func g(x uint32, k uint) uint32 {
- return x<<k | x>>(32-k)
- }
- `,
- pos: []string{"\tROLL\t[$]7,"},
- },
{
fn: `
func f33(m map[int]int) int {
}`,
pos: []string{"\tCMPQ\t[A-Z]"},
},
- // Non-constant rotate
- {
- fn: `func rot64l(x uint64, y int) uint64 {
- z := uint(y & 63)
- return x << z | x >> (64-z)
- }`,
- pos: []string{"\tROLQ\t"},
- },
- {
- fn: `func rot64r(x uint64, y int) uint64 {
- z := uint(y & 63)
- return x >> z | x << (64-z)
- }`,
- pos: []string{"\tRORQ\t"},
- },
- {
- fn: `func rot32l(x uint32, y int) uint32 {
- z := uint(y & 31)
- return x << z | x >> (32-z)
- }`,
- pos: []string{"\tROLL\t"},
- },
- {
- fn: `func rot32r(x uint32, y int) uint32 {
- z := uint(y & 31)
- return x >> z | x << (32-z)
- }`,
- pos: []string{"\tRORL\t"},
- },
- {
- fn: `func rot16l(x uint16, y int) uint16 {
- z := uint(y & 15)
- return x << z | x >> (16-z)
- }`,
- pos: []string{"\tROLW\t"},
- },
- {
- fn: `func rot16r(x uint16, y int) uint16 {
- z := uint(y & 15)
- return x >> z | x << (16-z)
- }`,
- pos: []string{"\tRORW\t"},
- },
- {
- fn: `func rot8l(x uint8, y int) uint8 {
- z := uint(y & 7)
- return x << z | x >> (8-z)
- }`,
- pos: []string{"\tROLB\t"},
- },
- {
- fn: `func rot8r(x uint8, y int) uint8 {
- z := uint(y & 7)
- return x >> z | x << (8-z)
- }`,
- pos: []string{"\tRORB\t"},
- },
// Check that array compare uses 2/4/8 byte compares
{
fn: `
}
var linuxS390XTests = []*asmTest{
- {
- fn: `
- func f8(x uint64) uint64 {
- return x<<7 + x>>57
- }
- `,
- pos: []string{"\tRLLG\t[$]7,"},
- },
- {
- fn: `
- func f9(x uint64) uint64 {
- return x<<7 | x>>57
- }
- `,
- pos: []string{"\tRLLG\t[$]7,"},
- },
- {
- fn: `
- func f10(x uint64) uint64 {
- return x<<7 ^ x>>57
- }
- `,
- pos: []string{"\tRLLG\t[$]7,"},
- },
- {
- fn: `
- func f11(x uint32) uint32 {
- return x<<7 + x>>25
- }
- `,
- pos: []string{"\tRLL\t[$]7,"},
- },
- {
- fn: `
- func f12(x uint32) uint32 {
- return x<<7 | x>>25
- }
- `,
- pos: []string{"\tRLL\t[$]7,"},
- },
- {
- fn: `
- func f13(x uint32) uint32 {
- return x<<7 ^ x>>25
- }
- `,
- pos: []string{"\tRLL\t[$]7,"},
- },
// Fused multiply-add/sub instructions.
{
fn: `
}
var linuxARMTests = []*asmTest{
- {
- fn: `
- func f0(x uint32) uint32 {
- return x<<7 + x>>25
- }
- `,
- pos: []string{"\tMOVW\tR[0-9]+@>25,"},
- },
- {
- fn: `
- func f1(x uint32) uint32 {
- return x<<7 | x>>25
- }
- `,
- pos: []string{"\tMOVW\tR[0-9]+@>25,"},
- },
- {
- fn: `
- func f2(x uint32) uint32 {
- return x<<7 ^ x>>25
- }
- `,
- pos: []string{"\tMOVW\tR[0-9]+@>25,"},
- },
{
// make sure assembly output has matching offset and base register.
fn: `
}
var linuxARM64Tests = []*asmTest{
- {
- fn: `
- func f0(x uint64) uint64 {
- return x<<7 + x>>57
- }
- `,
- pos: []string{"\tROR\t[$]57,"},
- },
- {
- fn: `
- func f1(x uint64) uint64 {
- return x<<7 | x>>57
- }
- `,
- pos: []string{"\tROR\t[$]57,"},
- },
- {
- fn: `
- func f2(x uint64) uint64 {
- return x<<7 ^ x>>57
- }
- `,
- pos: []string{"\tROR\t[$]57,"},
- },
- {
- fn: `
- func f3(x uint32) uint32 {
- return x<<7 + x>>25
- }
- `,
- pos: []string{"\tRORW\t[$]25,"},
- },
- {
- fn: `
- func f4(x uint32) uint32 {
- return x<<7 | x>>25
- }
- `,
- pos: []string{"\tRORW\t[$]25,"},
- },
- {
- fn: `
- func f5(x uint32) uint32 {
- return x<<7 ^ x>>25
- }
- `,
- pos: []string{"\tRORW\t[$]25,"},
- },
{
fn: `
func $(x, y uint32) uint32 {
`,
pos: []string{"\tFMSUBS\t"},
},
- {
- fn: `
- func f4(x uint32) uint32 {
- return x<<7 | x>>25
- }
- `,
- pos: []string{"\tROTLW\t"},
- },
- {
- fn: `
- func f5(x uint32) uint32 {
- return x<<7 + x>>25
- }
- `,
- pos: []string{"\tROTLW\t"},
- },
- {
- fn: `
- func f6(x uint32) uint32 {
- return x<<7 ^ x>>25
- }
- `,
- pos: []string{"\tROTLW\t"},
- },
- {
- fn: `
- func f7(x uint64) uint64 {
- return x<<7 | x>>57
- }
- `,
- pos: []string{"\tROTL\t"},
- },
- {
- fn: `
- func f8(x uint64) uint64 {
- return x<<7 + x>>57
- }
- `,
- pos: []string{"\tROTL\t"},
- },
- {
- fn: `
- func f9(x uint64) uint64 {
- return x<<7 ^ x>>57
- }
- `,
- pos: []string{"\tROTL\t"},
- },
{
// check that stack store is optimized away
fn: `
package codegen
+// ------------------- //
+// const rotates //
+// ------------------- //
+
+func rot64(x uint64) uint64 {
+ var a uint64
+
+ // amd64:"ROLQ\t[$]7"
+ // arm64:"ROR\t[$]57"
+ // s390x:"RLLG\t[$]7"
+ // ppc64le:"ROTL\t[$]7"
+ a += x<<7 | x>>57
+
+ // amd64:"ROLQ\t[$]8"
+ // arm64:"ROR\t[$]56"
+ // s390x:"RLLG\t[$]8"
+ // ppc64le:"ROTL\t[$]8"
+ a += x<<8 + x>>56
+
+ // amd64:"ROLQ\t[$]9"
+ // arm64:"ROR\t[$]55"
+ // s390x:"RLLG\t[$]9"
+ // ppc64le:"ROTL\t[$]9"
+ a += x<<9 ^ x>>55
+
+ return a
+}
+
func rot32(x uint32) uint32 {
var a uint32
- a += x<<7 | x>>25 // amd64:"ROLL.*[$]7" arm:"MOVW.*@>25"
- a += x<<8 + x>>24 // amd64:`ROLL.*\$8` arm:"MOVW.*@>24"
- a += x<<9 ^ x>>23 // amd64:"ROLL.*\\$9" arm:"MOVW.*@>23"
+
+ // amd64:"ROLL\t[$]7"
+ // arm:"MOVW\tR\\d+@>25"
+ // arm64:"RORW\t[$]25"
+ // s390x:"RLL\t[$]7"
+ // ppc64le:"ROTLW\t[$]7"
+ a += x<<7 | x>>25
+
+ // amd64:`ROLL\t[$]8`
+ // arm:"MOVW\tR\\d+@>24"
+ // arm64:"RORW\t[$]24"
+ // s390x:"RLL\t[$]8"
+ // ppc64le:"ROTLW\t[$]8"
+ a += x<<8 + x>>24
+
+ // amd64:"ROLL\t[$]9"
+ // arm:"MOVW\tR\\d+@>23"
+ // arm64:"RORW\t[$]23"
+ // s390x:"RLL\t[$]9"
+ // ppc64le:"ROTLW\t[$]9"
+ a += x<<9 ^ x>>23
+
return a
}
-func rot64(x uint64) uint64 {
+func rot16(x uint16) uint16 {
+ var a uint16
+
+ // amd64:"ROLW\t[$]7"
+ a += x<<7 | x>>9
+
+ // amd64:`ROLW\t[$]8`
+ a += x<<8 + x>>8
+
+ // amd64:"ROLW\t[$]9"
+ a += x<<9 ^ x>>7
+
+ return a
+}
+
+func rot8(x uint8) uint8 {
+ var a uint8
+
+ // amd64:"ROLB\t[$]5"
+ a += x<<5 | x>>3
+
+ // amd64:`ROLB\t[$]6`
+ a += x<<6 + x>>2
+
+ // amd64:"ROLB\t[$]7"
+ a += x<<7 ^ x>>1
+
+ return a
+}
+
+// ----------------------- //
+// non-const rotates //
+// ----------------------- //
+
+func rot64nc(x uint64, z uint) uint64 {
var a uint64
- a += x<<7 | x>>57 // amd64:"ROL"
- a += x<<8 + x>>56 // amd64:"ROL"
- a += x<<9 ^ x>>55 // amd64:"ROL"
+
+ z &= 63
+
+ // amd64:"ROLQ"
+ a += x<<z | x>>(64-z)
+
+ // amd64:"RORQ"
+ a += x>>z | x<<(64-z)
+
return a
}
+
+func rot32nc(x uint32, z uint) uint32 {
+ var a uint32
+
+ z &= 31
+
+ // amd64:"ROLL"
+ a += x<<z | x>>(32-z)
+
+ // amd64:"RORL"
+ a += x>>z | x<<(32-z)
+
+ return a
+}
+
+func rot16nc(x uint16, z uint) uint16 {
+ var a uint16
+
+ z &= 15
+
+ // amd64:"ROLW"
+ a += x<<z | x>>(16-z)
+
+ // amd64:"RORW"
+ a += x>>z | x<<(16-z)
+
+ return a
+}
+
+func rot8nc(x uint8, z uint) uint8 {
+ var a uint8
+
+ z &= 7
+
+ // amd64:"ROLB"
+ a += x<<z | x>>(8-z)
+
+ // amd64:"RORB"
+ a += x>>z | x<<(8-z)
+
+ return a
+}
+
+// Issue 18254: rotate after inlining
+func f32(x uint32) uint32 {
+ // amd64:"ROLL\t[$]7"
+ return rot32nc(x, 7)
+}