ADD(Q|L) has generally twice the throughput.
Came up in CL 626998.
Throughput by arch:
Zen 4:
SHLL (R64, 1): 0.5
ADD (R64, R64): 0.25
Intel Alder Lake:
SHLL (R64, 1): 0.5
ADD (R64, R64): 0.2
Intel Haswell:
SHLL (R64, 1): 0.5
ADD (R64, R64): 0.25
Also include a minor opt for:
(x + x) << c -> x << (c + 1)
Before this, the code:
func addShift(x int64) int64 {
return (x + x) << 1
}
emitted two instructions:
ADDQ AX, AX
SHLQ $1, AX
but we can do it in a single shift:
SHLQ $2, AX
Add a codegen test for clearing the last bit.
compilecmp linux/amd64:
math
math.sqrt 243 -> 242 (-0.41%)
math [cmd/compile]
math.sqrt 243 -> 242 (-0.41%)
runtime
runtime.selectgo 5455 -> 5445 (-0.18%)
runtime.sysargs 665 -> 662 (-0.45%)
runtime.isPinned 145 -> 141 (-2.76%)
runtime.atoi64 198 -> 194 (-2.02%)
runtime.setPinned 714 -> 709 (-0.70%)
runtime [cmd/compile]
runtime.sysargs 665 -> 662 (-0.45%)
runtime.setPinned 714 -> 709 (-0.70%)
runtime.atoi64 198 -> 194 (-2.02%)
runtime.isPinned 145 -> 141 (-2.76%)
strconv
strconv.computeBounds 109 -> 107 (-1.83%)
strconv.FormatInt 201 -> 197 (-1.99%)
strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%)
strconv.small 144 -> 134 (-6.94%)
strconv.AppendInt 357 -> 344 (-3.64%)
strconv.ryuDigits32 490 -> 488 (-0.41%)
strconv.AppendUint 342 -> 340 (-0.58%)
strconv [cmd/compile]
strconv.FormatInt 201 -> 197 (-1.99%)
strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%)
strconv.ryuDigits32 490 -> 488 (-0.41%)
strconv.AppendUint 342 -> 340 (-0.58%)
strconv.computeBounds 109 -> 107 (-1.83%)
strconv.small 144 -> 134 (-6.94%)
strconv.AppendInt 357 -> 344 (-3.64%)
image
image.Rectangle.Inset 101 -> 97 (-3.96%)
regexp/syntax
regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%)
regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%)
regexp/syntax.ranges.Less 153 -> 150 (-1.96%)
regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%)
time
time.Time.Before 179 -> 161 (-10.06%)
time.Time.Compare 189 -> 166 (-12.17%)
time.Time.Sub 444 -> 425 (-4.28%)
time.Time.UnixMicro 106 -> 95 (-10.38%)
time.div 592 -> 587 (-0.84%)
time.Time.UnixNano 85 -> 78 (-8.24%)
time.(*Time).UnixMilli 141 -> 140 (-0.71%)
time.Time.UnixMilli 106 -> 95 (-10.38%)
time.(*Time).UnixMicro 141 -> 140 (-0.71%)
time.Time.After 179 -> 161 (-10.06%)
time.Time.Equal 170 -> 150 (-11.76%)
time.Time.AppendBinary 766 -> 757 (-1.17%)
time.Time.IsZero 74 -> 66 (-10.81%)
time.(*Time).UnixNano 124 -> 113 (-8.87%)
time.(*Time).IsZero 113 -> 108 (-4.42%)
regexp
regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%)
regexp.QuoteMeta 485 -> 469 (-3.30%)
regexp/syntax [cmd/compile]
regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%)
regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%)
regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%)
regexp/syntax.ranges.Less 153 -> 150 (-1.96%)
encoding/base64
encoding/base64.decodedLen 92 -> 90 (-2.17%)
encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%)
time [cmd/compile]
time.(*Time).IsZero 113 -> 108 (-4.42%)
time.Time.IsZero 74 -> 66 (-10.81%)
time.(*Time).UnixNano 124 -> 113 (-8.87%)
time.Time.UnixMilli 106 -> 95 (-10.38%)
time.Time.Equal 170 -> 150 (-11.76%)
time.Time.UnixMicro 106 -> 95 (-10.38%)
time.(*Time).UnixMicro 141 -> 140 (-0.71%)
time.Time.Before 179 -> 161 (-10.06%)
time.Time.UnixNano 85 -> 78 (-8.24%)
time.Time.AppendBinary 766 -> 757 (-1.17%)
time.div 592 -> 587 (-0.84%)
time.Time.After 179 -> 161 (-10.06%)
time.Time.Compare 189 -> 166 (-12.17%)
time.(*Time).UnixMilli 141 -> 140 (-0.71%)
time.Time.Sub 444 -> 425 (-4.28%)
index/suffixarray
index/suffixarray.sais_8_32 1677 -> 1645 (-1.91%)
index/suffixarray.sais_32 1677 -> 1645 (-1.91%)
index/suffixarray.sais_64 1677 -> 1654 (-1.37%)
index/suffixarray.sais_8_64 1677 -> 1654 (-1.37%)
index/suffixarray.writeInt 249 -> 247 (-0.80%)
os
os.Expand 1070 -> 1051 (-1.78%)
os.Chtimes 787 -> 774 (-1.65%)
regexp [cmd/compile]
regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%)
regexp.QuoteMeta 485 -> 469 (-3.30%)
encoding/base64 [cmd/compile]
encoding/base64.decodedLen 92 -> 90 (-2.17%)
encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%)
encoding/hex
encoding/hex.Encode 138 -> 136 (-1.45%)
encoding/hex.(*decoder).Read 830 -> 824 (-0.72%)
crypto/des
crypto/des.initFeistelBox 235 -> 229 (-2.55%)
crypto/des.cryptBlock 549 -> 538 (-2.00%)
os [cmd/compile]
os.Chtimes 787 -> 774 (-1.65%)
os.Expand 1070 -> 1051 (-1.78%)
math/big
math/big.newFloat 238 -> 223 (-6.30%)
math/big.nat.mul 2138 -> 2122 (-0.75%)
math/big.karatsubaSqr 1372 -> 1369 (-0.22%)
math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%)
math/big.basicSqr 1032 -> 1017 (-1.45%)
cmd/vendor/golang.org/x/sys/unix
cmd/vendor/golang.org/x/sys/unix.TimeToTimespec 72 -> 66 (-8.33%)
encoding/json
encoding/json.Indent 404 -> 403 (-0.25%)
encoding/json.MarshalIndent 303 -> 297 (-1.98%)
testing
testing.(*T).Deadline 84 -> 82 (-2.38%)
testing.(*M).Run 3545 -> 3525 (-0.56%)
archive/zip
archive/zip.headerFileInfo.ModTime 229 -> 223 (-2.62%)
encoding/gob
encoding/gob.(*encoderState).encodeInt 474 -> 469 (-1.05%)
crypto/elliptic
crypto/elliptic.Marshal 728 -> 714 (-1.92%)
debug/buildinfo
debug/buildinfo.readString 325 -> 315 (-3.08%)
image/png
image/png.(*decoder).readImagePass 10866 -> 10834 (-0.29%)
archive/tar
archive/tar.Header.allowedFormats.func3 1768 -> 1736 (-1.81%)
archive/tar.formatPAXTime 389 -> 358 (-7.97%)
archive/tar.(*Writer).writeGNUHeader 741 -> 727 (-1.89%)
archive/tar.readGNUSparseMap0x1 709 -> 695 (-1.97%)
archive/tar.(*Writer).templateV7Plus 915 -> 909 (-0.66%)
crypto/internal/cryptotest
crypto/internal/cryptotest.TestHash.func4 890 -> 879 (-1.24%)
crypto/internal/cryptotest.TestStream.func6.1 646 -> 645 (-0.15%)
crypto/internal/cryptotest.testCipher.func3 1300 -> 1289 (-0.85%)
internal/pkgbits
internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%)
internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%)
testing/quick
testing/quick.(*Config).getRand 316 -> 315 (-0.32%)
log/slog
log/slog.TimeValue 489 -> 479 (-2.04%)
runtime/pprof
runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%)
internal/coverage/cfile
internal/coverage/cfile.(*emitState).openMetaFile 824 -> 822 (-0.24%)
internal/coverage/cfile.(*emitState).openCounterFile 904 -> 892 (-1.33%)
cmd/internal/objabi
cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%)
crypto/ecdsa
crypto/ecdsa.pointFromAffine 1162 -> 1144 (-1.55%)
net
net.minNonzeroTime 313 -> 308 (-1.60%)
net.cgoLookupAddrPTR 812 -> 797 (-1.85%)
net.(*IPNet).String 851 -> 827 (-2.82%)
net.IP.AppendText 488 -> 471 (-3.48%)
net.IPMask.String 281 -> 270 (-3.91%)
net.partialDeadline 374 -> 366 (-2.14%)
net.hexString 249 -> 240 (-3.61%)
net.IP.String 454 -> 453 (-0.22%)
internal/fuzz
internal/fuzz.newPcgRand 240 -> 234 (-2.50%)
crypto/x509
crypto/x509.(*Certificate).isValid 2642 -> 2611 (-1.17%)
cmd/internal/obj/s390x
cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%)
encoding/hex [cmd/compile]
encoding/hex.(*decoder).Read 830 -> 824 (-0.72%)
encoding/hex.Encode 138 -> 136 (-1.45%)
cmd/internal/objabi [cmd/compile]
cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%)
math/big [cmd/compile]
math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%)
math/big.nat.mul 2138 -> 2122 (-0.75%)
math/big.karatsubaSqr 1372 -> 1369 (-0.22%)
math/big.basicSqr 1032 -> 1017 (-1.45%)
math/big.newFloat 238 -> 223 (-6.30%)
encoding/json [cmd/compile]
encoding/json.MarshalIndent 303 -> 297 (-1.98%)
encoding/json.Indent 404 -> 403 (-0.25%)
cmd/covdata
main.(*metaMerge).emitCounters 985 -> 973 (-1.22%)
runtime/pprof [cmd/compile]
runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%)
cmd/compile/internal/syntax
cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%)
cmd/dist
main.runInstall 19081 -> 19049 (-0.17%)
crypto/tls
crypto/tls.extractPadding 176 -> 175 (-0.57%)
slices.Clone[[]crypto/tls.SignatureScheme,crypto/tls.SignatureScheme] 253 -> 247 (-2.37%)
slices.Clone[[]uint16,uint16] 253 -> 247 (-2.37%)
slices.Clone[[]crypto/tls.CurveID,crypto/tls.CurveID] 253 -> 247 (-2.37%)
crypto/tls.(*Config).cipherSuites 335 -> 326 (-2.69%)
slices.DeleteFunc[go.shape.[]crypto/tls.CurveID,go.shape.uint16] 437 -> 434 (-0.69%)
crypto/tls.dial 1349 -> 1339 (-0.74%)
slices.DeleteFunc[go.shape.[]uint16,go.shape.uint16] 437 -> 434 (-0.69%)
internal/pkgbits [cmd/compile]
internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%)
internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%)
cmd/compile/internal/syntax [cmd/compile]
cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%)
cmd/internal/obj/s390x [cmd/compile]
cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%)
cmd/go/internal/trace
cmd/go/internal/trace.Flow 910 -> 886 (-2.64%)
cmd/go/internal/trace.(*Span).Done 311 -> 304 (-2.25%)
cmd/go/internal/trace.StartSpan 620 -> 615 (-0.81%)
cmd/internal/script
cmd/internal/script.(*Engine).Execute.func2 534 -> 528 (-1.12%)
cmd/link/internal/loader
cmd/link/internal/loader.(*Loader).SetSymSect 344 -> 338 (-1.74%)
net/http
net/http.(*Transport).queueForIdleConn 1797 -> 1766 (-1.73%)
net/http.(*Transport).getConn 2149 -> 2131 (-0.84%)
net/http.(*http2ClientConn).tooIdleLocked 207 -> 197 (-4.83%)
net/http.(*http2responseWriter).SetWriteDeadline.func1 520 -> 508 (-2.31%)
net/http.(*Cookie).Valid 837 -> 818 (-2.27%)
net/http.(*http2responseWriter).SetReadDeadline 373 -> 357 (-4.29%)
net/http.checkIfRange 701 -> 690 (-1.57%)
net/http.(*http2SettingsFrame).Value 325 -> 298 (-8.31%)
net/http.(*http2SettingsFrame).HasDuplicates 777 -> 767 (-1.29%)
net/http.(*Server).Serve 1746 -> 1739 (-0.40%)
net/http.http2traceGotConn 569 -> 556 (-2.28%)
net/http/pprof
net/http/pprof.collectProfile 242 -> 239 (-1.24%)
cmd/compile/internal/coverage
cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%)
cmd/vendor/golang.org/x/telemetry/internal/upload
cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).findWork 4570 -> 4540 (-0.66%)
cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).reports 3604 -> 3572 (-0.89%)
cmd/compile/internal/coverage [cmd/compile]
cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%)
cmd/vendor/golang.org/x/text/language
cmd/vendor/golang.org/x/text/language.regionGroupDist 287 -> 284 (-1.05%)
cmd/go/internal/vcweb
cmd/go/internal/vcweb.(*Server).overview.func1 1045 -> 1041 (-0.38%)
cmd/go/internal/vcs
cmd/go/internal/vcs.expand 761 -> 741 (-2.63%)
cmd/compile/internal/inline/inlheur
slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%)
cmd/compile/internal/inline/inlheur [cmd/compile]
slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%)
cmd/go/internal/modfetch/codehost
cmd/go/internal/modfetch/codehost.bzrParseStat 2217 -> 2213 (-0.18%)
cmd/link/internal/ld
cmd/link/internal/ld.decodetypeStructFieldCount 157 -> 152 (-3.18%)
cmd/link/internal/ld.(*Link).address 12559 -> 12495 (-0.51%)
cmd/link/internal/ld.(*dodataState).allocateDataSections 18345 -> 18205 (-0.76%)
cmd/link/internal/ld.elfshreloc 618 -> 616 (-0.32%)
cmd/link/internal/ld.(*deadcodePass).decodetypeMethods 794 -> 779 (-1.89%)
cmd/link/internal/ld.(*dodataState).assignDsymsToSection 668 -> 663 (-0.75%)
cmd/link/internal/ld.relocSectFn 285 -> 284 (-0.35%)
cmd/link/internal/ld.decodetypeIfaceMethodCount 146 -> 144 (-1.37%)
cmd/link/internal/ld.decodetypeArrayLen 157 -> 152 (-3.18%)
cmd/link/internal/arm64
cmd/link/internal/arm64.gensymlate.func1 895 -> 888 (-0.78%)
cmd/go/internal/modload
cmd/go/internal/modload.queryProxy.func3 1029 -> 1012 (-1.65%)
cmd/go/internal/load
cmd/go/internal/load.(*Package).setBuildInfo 8453 -> 8447 (-0.07%)
cmd/go/internal/clean
cmd/go/internal/clean.runClean 2120 -> 2104 (-0.75%)
cmd/compile/internal/ssa
cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%)
cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%)
cmd/compile/internal/ssa.(*debugState).buildLocationLists 3326 -> 3294 (-0.96%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%)
cmd/compile/internal/ssa.(*debugState).processValue 9756 -> 9724 (-0.33%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%)
cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%)
cmd/compile/internal/ssa [cmd/compile]
cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%)
cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%)
cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%)
file before after Δ %
math/bits.s 2352 2354 +2 +0.085%
math/bits [cmd/compile].s 2352 2354 +2 +0.085%
math.s 35675 35674 -1 -0.003%
math [cmd/compile].s 35675 35674 -1 -0.003%
runtime.s 577251 577245 -6 -0.001%
runtime [cmd/compile].s 642419 642438 +19 +0.003%
sort.s 37434 37435 +1 +0.003%
strconv.s 48391 48343 -48 -0.099%
sort [cmd/compile].s 37434 37435 +1 +0.003%
bufio.s 21386 21418 +32 +0.150%
strconv [cmd/compile].s 48391 48343 -48 -0.099%
image.s 34978 35022 +44 +0.126%
regexp/syntax.s 81719 81781 +62 +0.076%
time.s 94341 94184 -157 -0.166%
regexp.s 60411 60399 -12 -0.020%
bufio [cmd/compile].s 21512 21544 +32 +0.149%
encoding/binary.s 34062 34087 +25 +0.073%
regexp/syntax [cmd/compile].s 81719 81781 +62 +0.076%
encoding/base64.s 11907 11903 -4 -0.034%
time [cmd/compile].s 94341 94184 -157 -0.166%
index/suffixarray.s 41633 41527 -106 -0.255%
os.s 101770 101738 -32 -0.031%
regexp [cmd/compile].s 60411 60399 -12 -0.020%
encoding/binary [cmd/compile].s 37173 37198 +25 +0.067%
encoding/base64 [cmd/compile].s 11907 11903 -4 -0.034%
os/exec.s 23900 23907 +7 +0.029%
encoding/hex.s 6038 6030 -8 -0.132%
crypto/des.s 5073 5056 -17 -0.335%
os [cmd/compile].s 102030 101998 -32 -0.031%
vendor/golang.org/x/net/http2/hpack.s 22027 22033 +6 +0.027%
math/big.s 164808 164753 -55 -0.033%
cmd/vendor/golang.org/x/sys/unix.s 121450 121444 -6 -0.005%
encoding/json.s 110294 110287 -7 -0.006%
testing.s 115303 115281 -22 -0.019%
archive/zip.s 65329 65325 -4 -0.006%
os/user.s 10078 10080 +2 +0.020%
encoding/gob.s 143788 143783 -5 -0.003%
crypto/elliptic.s 30686 30704 +18 +0.059%
go/doc/comment.s 49401 49433 +32 +0.065%
debug/buildinfo.s 9095 9085 -10 -0.110%
image/png.s 36113 36081 -32 -0.089%
archive/tar.s 71994 71897 -97 -0.135%
crypto/internal/cryptotest.s 60872 60849 -23 -0.038%
internal/pkgbits.s 20441 20429 -12 -0.059%
testing/quick.s 8236 8235 -1 -0.012%
log/slog.s 77568 77558 -10 -0.013%
internal/trace/internal/oldtrace.s 52885 52896 +11 +0.021%
runtime/pprof.s 123978 123969 -9 -0.007%
internal/coverage/cfile.s 25198 25184 -14 -0.056%
cmd/internal/objabi.s 19954 19946 -8 -0.040%
crypto/ecdsa.s 29159 29141 -18 -0.062%
log/slog/internal/benchmarks.s 6694 6695 +1 +0.015%
net.s 299569 299503 -66 -0.022%
os/exec [cmd/compile].s 23888 23895 +7 +0.029%
internal/trace.s 179226 179240 +14 +0.008%
internal/fuzz.s 86190 86191 +1 +0.001%
crypto/x509.s 177195 177164 -31 -0.017%
cmd/internal/obj/s390x.s 121642 121610 -32 -0.026%
cmd/internal/obj/ppc64.s 140118 140122 +4 +0.003%
encoding/hex [cmd/compile].s 6149 6141 -8 -0.130%
cmd/internal/objabi [cmd/compile].s 19954 19946 -8 -0.040%
cmd/internal/obj/arm64.s 158523 158555 +32 +0.020%
go/doc/comment [cmd/compile].s 49512 49544 +32 +0.065%
math/big [cmd/compile].s 166394 166339 -55 -0.033%
encoding/json [cmd/compile].s 110712 110705 -7 -0.006%
cmd/covdata.s 39699 39687 -12 -0.030%
runtime/pprof [cmd/compile].s 125209 125200 -9 -0.007%
cmd/compile/internal/syntax.s 181755 181736 -19 -0.010%
cmd/dist.s 177893 177861 -32 -0.018%
crypto/tls.s 389157 389113 -44 -0.011%
internal/pkgbits [cmd/compile].s 41644 41632 -12 -0.029%
cmd/compile/internal/syntax [cmd/compile].s 196105 196086 -19 -0.010%
cmd/compile/internal/types.s 71315 71345 +30 +0.042%
cmd/internal/obj/s390x [cmd/compile].s 121733 121701 -32 -0.026%
cmd/go/internal/trace.s 4796 4760 -36 -0.751%
cmd/internal/obj/arm64 [cmd/compile].s 168120 168147 +27 +0.016%
cmd/internal/obj/ppc64 [cmd/compile].s 140219 140223 +4 +0.003%
cmd/internal/script.s 83442 83436 -6 -0.007%
cmd/link/internal/loader.s 93299 93294 -5 -0.005%
net/http.s 620639 620472 -167 -0.027%
net/http/pprof.s 35016 35013 -3 -0.009%
cmd/compile/internal/coverage.s 6668 6667 -1 -0.015%
cmd/vendor/golang.org/x/telemetry/internal/upload.s 34210 34148 -62 -0.181%
cmd/compile/internal/coverage [cmd/compile].s 6664 6663 -1 -0.015%
cmd/vendor/golang.org/x/text/language.s 48077 48074 -3 -0.006%
cmd/go/internal/vcweb.s 45193 45189 -4 -0.009%
cmd/go/internal/vcs.s 44749 44729 -20 -0.045%
cmd/compile/internal/inline/inlheur.s 83758 83742 -16 -0.019%
cmd/compile/internal/inline/inlheur [cmd/compile].s 84773 84757 -16 -0.019%
cmd/go/internal/modfetch/codehost.s 89098 89094 -4 -0.004%
cmd/trace.s 257550 257564 +14 +0.005%
cmd/link/internal/ld.s 641945 641706 -239 -0.037%
cmd/link/internal/arm64.s 34805 34798 -7 -0.020%
cmd/go/internal/modload.s 328971 328954 -17 -0.005%
cmd/go/internal/load.s 178877 178871 -6 -0.003%
cmd/go/internal/clean.s 11006 10990 -16 -0.145%
cmd/compile/internal/ssa.s
3552843 3553347 +504 +0.014%
cmd/compile/internal/ssa [cmd/compile].s
3752511 3753123 +612 +0.016%
total
36179015 36178687 -328 -0.001%
Change-Id: I251c2898ccf3c9931d162d87dabbd49cf4ec73a5
Reviewed-on: https://go-review.googlesource.com/c/go/+/641757
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
// Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
// and further combining shifts.
(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
+(BT(Q|L)const [c] (ADDQ x x)) && c>1 => (BT(Q|L)const [c-1] x)
(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x)
(BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
+(BTLconst [c] (ADDL x x)) && c>1 => (BTLconst [c-1] x)
(BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x)
(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)
// We thus special-case them, by detecting the shift patterns.
// Special case resetting first/last bit
-(SHL(L|Q)const [1] (SHR(L|Q)const [1] x))
+(ADD(L|Q) (SHR(L|Q)const [1] x) (SHR(L|Q)const [1] x))
=> (AND(L|Q)const [-2] x)
-(SHRLconst [1] (SHLLconst [1] x))
+(SHRLconst [1] (ADDL x x))
=> (ANDLconst [0x7fffffff] x)
-(SHRQconst [1] (SHLQconst [1] x))
+(SHRQconst [1] (ADDQ x x))
=> (BTRQconst [63] x)
// Special case testing first/last bit (with double-shift generated by generic.rules)
(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 <v.Type> x x))
(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 <v.Type> x x))
+// Prefer addition when shifting left by one
+(SHL(Q|L)const [1] x) => (ADD(Q|L) x x)
+
// combine add/shift into LEAQ/LEAL
(ADD(L|Q) x (SHL(L|Q)const [3] y)) => (LEA(L|Q)8 x y)
(ADD(L|Q) x (SHL(L|Q)const [2] y)) => (LEA(L|Q)4 x y)
-(ADD(L|Q) x (SHL(L|Q)const [1] y)) => (LEA(L|Q)2 x y)
(ADD(L|Q) x (ADD(L|Q) y y)) => (LEA(L|Q)2 x y)
(ADD(L|Q) x (ADD(L|Q) x y)) => (LEA(L|Q)2 y x)
// combine ADDQ/ADDQconst into LEAQ1/LEAL1
(ADD(Q|L)const [c] (ADD(Q|L) x y)) => (LEA(Q|L)1 [c] x y)
(ADD(Q|L) (ADD(Q|L)const [c] x) y) => (LEA(Q|L)1 [c] x y)
-(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) => (LEA(Q|L)1 [c] x x)
+(ADD(Q|L)const [c] (ADD(Q|L) x x)) => (LEA(Q|L)1 [c] x x)
// fold ADDQ/ADDL into LEAQ/LEAL
(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x)
(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y)
// fold shifts into LEAQx/LEALx
-(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)2 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)2 [c] {s} x y)
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y)
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y)
-(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)4 [c] {s} x y)
(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y)
-(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)4 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)8 [c] {s} x y)
+
+// (x + x) << 1 -> x << 2
+(LEA(Q|L)2 [0] {s} (ADD(Q|L) x x) x) && s == nil => (SHL(Q|L)const [2] x)
+
+// (x + x) << 2 -> x << 3 and similar
+(SHL(Q|L)const [c] (ADD(Q|L) x x)) => (SHL(Q|L)const [c+1] x)
// reverse ordering of compare instruction
(SETL (InvertFlags x)) => (SETG x)
func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
+ // match: (ADDL (SHRLconst [1] x) (SHRLconst [1] x))
+ // result: (ANDLconst [-2] x)
+ for {
+ if v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
+ break
+ }
+ x := v_0.Args[0]
+ if v_1.Op != OpAMD64SHRLconst || auxIntToInt8(v_1.AuxInt) != 1 || x != v_1.Args[0] {
+ break
+ }
+ v.reset(OpAMD64ANDLconst)
+ v.AuxInt = int32ToAuxInt(-2)
+ v.AddArg(x)
+ return true
+ }
// match: (ADDL x (MOVLconst [c]))
// result: (ADDLconst [c] x)
for {
}
break
}
- // match: (ADDL x (SHLLconst [1] y))
- // result: (LEAL2 x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
- continue
- }
- y := v_1.Args[0]
- v.reset(OpAMD64LEAL2)
- v.AddArg2(x, y)
- return true
- }
- break
- }
// match: (ADDL x (ADDL y y))
// result: (LEAL2 x y)
for {
v.AddArg2(x, y)
return true
}
- // match: (ADDLconst [c] (SHLLconst [1] x))
+ // match: (ADDLconst [c] (ADDL x x))
// result: (LEAL1 [c] x x)
for {
c := auxIntToInt32(v.AuxInt)
- if v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
+ if v_0.Op != OpAMD64ADDL {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] {
break
}
- x := v_0.Args[0]
v.reset(OpAMD64LEAL1)
v.AuxInt = int32ToAuxInt(c)
v.AddArg2(x, x)
func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
+ // match: (ADDQ (SHRQconst [1] x) (SHRQconst [1] x))
+ // result: (ANDQconst [-2] x)
+ for {
+ if v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
+ break
+ }
+ x := v_0.Args[0]
+ if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 1 || x != v_1.Args[0] {
+ break
+ }
+ v.reset(OpAMD64ANDQconst)
+ v.AuxInt = int32ToAuxInt(-2)
+ v.AddArg(x)
+ return true
+ }
// match: (ADDQ x (MOVQconst <t> [c]))
// cond: is32Bit(c) && !t.IsPtr()
// result: (ADDQconst [int32(c)] x)
}
break
}
- // match: (ADDQ x (SHLQconst [1] y))
- // result: (LEAQ2 x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
- continue
- }
- y := v_1.Args[0]
- v.reset(OpAMD64LEAQ2)
- v.AddArg2(x, y)
- return true
- }
- break
- }
// match: (ADDQ x (ADDQ y y))
// result: (LEAQ2 x y)
for {
v.AddArg2(x, y)
return true
}
- // match: (ADDQconst [c] (SHLQconst [1] x))
+ // match: (ADDQconst [c] (ADDQ x x))
// result: (LEAQ1 [c] x x)
for {
c := auxIntToInt32(v.AuxInt)
- if v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 {
+ if v_0.Op != OpAMD64ADDQ {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] {
break
}
- x := v_0.Args[0]
v.reset(OpAMD64LEAQ1)
v.AuxInt = int32ToAuxInt(c)
v.AddArg2(x, x)
v.AddArg(x)
return true
}
+ // match: (BTLconst [c] (ADDQ x x))
+ // cond: c>1
+ // result: (BTLconst [c-1] x)
+ for {
+ c := auxIntToInt8(v.AuxInt)
+ if v_0.Op != OpAMD64ADDQ {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || !(c > 1) {
+ break
+ }
+ v.reset(OpAMD64BTLconst)
+ v.AuxInt = int8ToAuxInt(c - 1)
+ v.AddArg(x)
+ return true
+ }
// match: (BTLconst [c] (SHLQconst [d] x))
// cond: c>d
// result: (BTLconst [c-d] x)
v.AddArg(x)
return true
}
+ // match: (BTLconst [c] (ADDL x x))
+ // cond: c>1
+ // result: (BTLconst [c-1] x)
+ for {
+ c := auxIntToInt8(v.AuxInt)
+ if v_0.Op != OpAMD64ADDL {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || !(c > 1) {
+ break
+ }
+ v.reset(OpAMD64BTLconst)
+ v.AuxInt = int8ToAuxInt(c - 1)
+ v.AddArg(x)
+ return true
+ }
// match: (BTLconst [c] (SHLLconst [d] x))
// cond: c>d
// result: (BTLconst [c-d] x)
v.AddArg(x)
return true
}
+ // match: (BTQconst [c] (ADDQ x x))
+ // cond: c>1
+ // result: (BTQconst [c-1] x)
+ for {
+ c := auxIntToInt8(v.AuxInt)
+ if v_0.Op != OpAMD64ADDQ {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || !(c > 1) {
+ break
+ }
+ v.reset(OpAMD64BTQconst)
+ v.AuxInt = int8ToAuxInt(c - 1)
+ v.AddArg(x)
+ return true
+ }
// match: (BTQconst [c] (SHLQconst [d] x))
// cond: c>d
// result: (BTQconst [c-d] x)
}
break
}
- // match: (LEAL1 [c] {s} x (SHLLconst [1] y))
+ // match: (LEAL1 [c] {s} x (ADDL y y))
// result: (LEAL2 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
- if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
+ if v_1.Op != OpAMD64ADDL {
+ continue
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
continue
}
- y := v_1.Args[0]
v.reset(OpAMD64LEAL2)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
v.AddArg2(x, y)
return true
}
- // match: (LEAL2 [c] {s} x (SHLLconst [1] y))
+ // match: (LEAL2 [c] {s} x (ADDL y y))
// result: (LEAL4 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
x := v_0
- if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
+ if v_1.Op != OpAMD64ADDL {
+ break
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
break
}
- y := v_1.Args[0]
v.reset(OpAMD64LEAL4)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
v.AddArg2(x, y)
return true
}
+ // match: (LEAL2 [0] {s} (ADDL x x) x)
+ // cond: s == nil
+ // result: (SHLLconst [2] x)
+ for {
+ if auxIntToInt32(v.AuxInt) != 0 {
+ break
+ }
+ s := auxToSym(v.Aux)
+ if v_0.Op != OpAMD64ADDL {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || x != v_1 || !(s == nil) {
+ break
+ }
+ v.reset(OpAMD64SHLLconst)
+ v.AuxInt = int8ToAuxInt(2)
+ v.AddArg(x)
+ return true
+ }
return false
}
func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool {
v.AddArg2(x, y)
return true
}
- // match: (LEAL4 [c] {s} x (SHLLconst [1] y))
+ // match: (LEAL4 [c] {s} x (ADDL y y))
// result: (LEAL8 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
x := v_0
- if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
+ if v_1.Op != OpAMD64ADDL {
+ break
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
break
}
- y := v_1.Args[0]
v.reset(OpAMD64LEAL8)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
}
break
}
- // match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
+ // match: (LEAQ1 [c] {s} x (ADDQ y y))
// result: (LEAQ2 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
- if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
+ if v_1.Op != OpAMD64ADDQ {
+ continue
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
continue
}
- y := v_1.Args[0]
v.reset(OpAMD64LEAQ2)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
v.AddArg2(x, y)
return true
}
- // match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
+ // match: (LEAQ2 [c] {s} x (ADDQ y y))
// result: (LEAQ4 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
x := v_0
- if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
+ if v_1.Op != OpAMD64ADDQ {
+ break
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
break
}
- y := v_1.Args[0]
v.reset(OpAMD64LEAQ4)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
v.AddArg2(x, y)
return true
}
+ // match: (LEAQ2 [0] {s} (ADDQ x x) x)
+ // cond: s == nil
+ // result: (SHLQconst [2] x)
+ for {
+ if auxIntToInt32(v.AuxInt) != 0 {
+ break
+ }
+ s := auxToSym(v.Aux)
+ if v_0.Op != OpAMD64ADDQ {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || x != v_1 || !(s == nil) {
+ break
+ }
+ v.reset(OpAMD64SHLQconst)
+ v.AuxInt = int8ToAuxInt(2)
+ v.AddArg(x)
+ return true
+ }
// match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB
// result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
v.AddArg2(x, y)
return true
}
- // match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
+ // match: (LEAQ4 [c] {s} x (ADDQ y y))
// result: (LEAQ8 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
x := v_0
- if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
+ if v_1.Op != OpAMD64ADDQ {
+ break
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
break
}
- y := v_1.Args[0]
v.reset(OpAMD64LEAQ8)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
}
func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
v_0 := v.Args[0]
- // match: (SHLLconst [1] (SHRLconst [1] x))
- // result: (ANDLconst [-2] x)
+ // match: (SHLLconst x [0])
+ // result: x
for {
- if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
+ if auxIntToInt8(v.AuxInt) != 0 {
break
}
- x := v_0.Args[0]
- v.reset(OpAMD64ANDLconst)
- v.AuxInt = int32ToAuxInt(-2)
- v.AddArg(x)
+ x := v_0
+ v.copyOf(x)
return true
}
- // match: (SHLLconst x [0])
- // result: x
+ // match: (SHLLconst [1] x)
+ // result: (ADDL x x)
for {
- if auxIntToInt8(v.AuxInt) != 0 {
+ if auxIntToInt8(v.AuxInt) != 1 {
break
}
x := v_0
- v.copyOf(x)
+ v.reset(OpAMD64ADDL)
+ v.AddArg2(x, x)
+ return true
+ }
+ // match: (SHLLconst [c] (ADDL x x))
+ // result: (SHLLconst [c+1] x)
+ for {
+ c := auxIntToInt8(v.AuxInt)
+ if v_0.Op != OpAMD64ADDL {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] {
+ break
+ }
+ v.reset(OpAMD64SHLLconst)
+ v.AuxInt = int8ToAuxInt(c + 1)
+ v.AddArg(x)
return true
}
// match: (SHLLconst [d] (MOVLconst [c]))
}
func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
v_0 := v.Args[0]
- // match: (SHLQconst [1] (SHRQconst [1] x))
- // result: (ANDQconst [-2] x)
+ // match: (SHLQconst x [0])
+ // result: x
for {
- if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
+ if auxIntToInt8(v.AuxInt) != 0 {
break
}
- x := v_0.Args[0]
- v.reset(OpAMD64ANDQconst)
- v.AuxInt = int32ToAuxInt(-2)
- v.AddArg(x)
+ x := v_0
+ v.copyOf(x)
return true
}
- // match: (SHLQconst x [0])
- // result: x
+ // match: (SHLQconst [1] x)
+ // result: (ADDQ x x)
for {
- if auxIntToInt8(v.AuxInt) != 0 {
+ if auxIntToInt8(v.AuxInt) != 1 {
break
}
x := v_0
- v.copyOf(x)
+ v.reset(OpAMD64ADDQ)
+ v.AddArg2(x, x)
+ return true
+ }
+ // match: (SHLQconst [c] (ADDQ x x))
+ // result: (SHLQconst [c+1] x)
+ for {
+ c := auxIntToInt8(v.AuxInt)
+ if v_0.Op != OpAMD64ADDQ {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] {
+ break
+ }
+ v.reset(OpAMD64SHLQconst)
+ v.AuxInt = int8ToAuxInt(c + 1)
+ v.AddArg(x)
return true
}
// match: (SHLQconst [d] (MOVQconst [c]))
}
func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
v_0 := v.Args[0]
- // match: (SHRLconst [1] (SHLLconst [1] x))
+ // match: (SHRLconst [1] (ADDL x x))
// result: (ANDLconst [0x7fffffff] x)
for {
- if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
+ if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDL {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] {
break
}
- x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(0x7fffffff)
v.AddArg(x)
}
func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool {
v_0 := v.Args[0]
- // match: (SHRQconst [1] (SHLQconst [1] x))
+ // match: (SHRQconst [1] (ADDQ x x))
// result: (BTRQconst [63] x)
for {
- if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 {
+ if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDQ {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] {
break
}
- x := v_0.Args[0]
v.reset(OpAMD64BTRQconst)
v.AuxInt = int8ToAuxInt(63)
v.AddArg(x)
return a, b
}
+func Mul_2(n1 int32, n2 int64) (int32, int64) {
+ // amd64:"ADDL", -"SHLL"
+ a := n1 * 2
+ // amd64:"ADDQ", -"SHLQ"
+ b := n2 * 2
+
+ return a, b
+}
+
func Mul_96(n int) int {
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
return n
}
+func clearLastBit(x int64, y int32) (int64, int32) {
+ // amd64:"ANDQ\t[$]-2"
+ a := (x >> 1) << 1
+
+ // amd64:"ANDL\t[$]-2"
+ b := (y >> 1) << 1
+
+ return a, b
+}
+
func bitcompl64(a, b uint64) (n uint64) {
// amd64:"BTCQ"
n += b ^ (1 << (a & 63))
return int64(v) >> 8
}
+func lshConst32x1(v int32) int32 {
+ // amd64:"ADDL", -"SHLL"
+ return v << 1
+}
+
+func lshConst64x1(v int64) int64 {
+ // amd64:"ADDQ", -"SHLQ"
+ return v << 1
+}
+
func lshConst32x64(v int32) int32 {
// ppc64x:"SLW"
// riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW"
return v >> uint32(33)
}
+func lshConst32x1Add(x int32) int32 {
+ // amd64:"SHLL\t[$]2"
+ return (x + x) << 1
+}
+
+func lshConst64x1Add(x int64) int64 {
+ // amd64:"SHLQ\t[$]2"
+ return (x + x) << 1
+}
+
+func lshConst32x2Add(x int32) int32 {
+ // amd64:"SHLL\t[$]3"
+ return (x + x) << 2
+}
+
+func lshConst64x2Add(x int64) int64 {
+ // amd64:"SHLQ\t[$]3"
+ return (x + x) << 2
+}
+
// ------------------ //
// masked shifts //
// ------------------ //