The Go compiler always generates Loong64 binaries, which can execute any
processor of LA364, LA464, LA664 or higher core. And these processors
support unaligned memory access [1].
goos: linux
goarch: loong64
pkg: strings
cpu: Loongson-3A6000 @ 2500.00MHz
| old.txt | new.txt |
| sec/op | sec/op vs base |
StringPrefix3 4.0040n ± 0% 0.4003n ± 0% -90.00% (p=0.000 n=10)
StringPrefix5 4.0040n ± 0% 0.4003n ± 0% -90.00% (p=0.000 n=10)
StringPrefix6 3.6030n ± 0% 0.4002n ± 0% -88.89% (p=0.000 n=10)
StringPrefix7 4.0040n ± 0% 0.4002n ± 0% -90.00% (p=0.000 n=10)
geomean 3.900n 0.4003n -89.74%
goos: linux
goarch: loong64
pkg: strings
cpu: Loongson-3A5000-HV @ 2500.00MHz
| old.txt │ new.txt |
| sec/op │ sec/op vs base |
StringPrefix3 5.6160n ± 0% 0.4011n ± 0% -92.86% (p=0.000 n=10)
StringPrefix5 5.6180n ± 0% 0.4011n ± 0% -92.86% (p=0.000 n=10)
StringPrefix6 5.2170n ± 0% 0.4011n ± 0% -92.31% (p=0.000 n=10)
StringPrefix7 5.6170n ± 0% 0.4009n ± 0% -92.86% (p=0.000 n=10)
geomean 5.514n 0.4010n -92.73%
goos: linux
goarch: loong64
pkg: strings
cpu: Loongson-3B6000M @ 2400.00MHz
| old.txt │ new.txt |
| sec/op │ sec/op vs base |
StringPrefix3 5.0060n ± 0% 0.4223n ± 1% -91.56% (p=0.000 n=10)
StringPrefix5 4.5890n ± 0% 0.4214n ± 0% -90.82% (p=0.000 n=10)
StringPrefix6 4.5890n ± 0% 0.4190n ± 1% -90.87% (p=0.000 n=10)
StringPrefix7 4.5890n ± 0% 0.4226n ± 1% -90.79% (p=0.000 n=10)
geomean 4.690n 0.4213n -91.02%
[1]: https://go.dev/wiki/MinimumRequirements#loong64
Change-Id: I1870080e0122a7d136685e3045699d0cf1e4194d
Reviewed-on: https://go-review.googlesource.com/c/go/+/742260
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
PtrSize: 8,
RegSize: 8,
MinLC: 4,
- Alignment: 8, // Unaligned accesses are not guaranteed to be fast
+ Alignment: 1,
CanMergeLoads: true,
CanJumpTable: true,
HasLR: true,
b := string("Z")
// amd64:-".*memequal"
// arm64:-".*memequal"
+ // loong64:-".*memequal"
// ppc64x:-".*memequal"
return a == b
}
b := string("Z")
// amd64:-".*memequal"
// arm64:-".*memequal"
+ // loong64:-".*memequal"
// ppc64x:-".*memequal"
return a[:1] == b
}
b := string("ZZ")
// amd64:-".*memequal"
// arm64:-".*memequal"
+ // loong64:-".*memequal"
// ppc64x:-".*memequal"
return a == b
}
b := string("ZZ")
// amd64:-".*memequal"
// arm64:-".*memequal"
+ // loong64:-".*memequal"
// ppc64x:-".*memequal"
return a[:2] == b
}
b := string("ZZZZ")
// amd64:-".*memequal"
// arm64:-".*memequal"
+ // loong64:-".*memequal"
// ppc64x:-".*memequal"
return a == b
}
b := string("ZZZZ")
// amd64:-".*memequal"
// arm64:-".*memequal"
+ // loong64:-".*memequal"
// ppc64x:-".*memequal"
return a[:4] == b
}
b := string("ZZZZZZZZ")
// amd64:-".*memequal"
// arm64:-".*memequal"
+ // loong64:-".*memequal"
// ppc64x:-".*memequal"
return a == b
}
b := string("ZZZZZZZZ")
// amd64:-".*memequal"
// arm64:-".*memequal"
+ // loong64:-".*memequal"
// ppc64x:-".*memequal"
return a[:8] == b
}