From: Jakub Ciolek Date: Sat, 11 Jan 2025 18:26:57 +0000 (+0100) Subject: cmd/compile: prefer an add when shifting left by 1 X-Git-Tag: go1.25rc1~1154 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=cd595be6d669af171bc28bdc939cc36785717718;p=gostls13.git cmd/compile: prefer an add when shifting left by 1 ADD(Q|L) has generally twice the throughput. Came up in CL 626998. Throughput by arch: Zen 4: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.25 Intel Alder Lake: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.2 Intel Haswell: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.25 Also include a minor opt for: (x + x) << c -> x << (c + 1) Before this, the code: func addShift(x int64) int64 { return (x + x) << 1 } emitted two instructions: ADDQ AX, AX SHLQ $1, AX but we can do it in a single shift: SHLQ $2, AX Add a codegen test for clearing the last bit. compilecmp linux/amd64: math math.sqrt 243 -> 242 (-0.41%) math [cmd/compile] math.sqrt 243 -> 242 (-0.41%) runtime runtime.selectgo 5455 -> 5445 (-0.18%) runtime.sysargs 665 -> 662 (-0.45%) runtime.isPinned 145 -> 141 (-2.76%) runtime.atoi64 198 -> 194 (-2.02%) runtime.setPinned 714 -> 709 (-0.70%) runtime [cmd/compile] runtime.sysargs 665 -> 662 (-0.45%) runtime.setPinned 714 -> 709 (-0.70%) runtime.atoi64 198 -> 194 (-2.02%) runtime.isPinned 145 -> 141 (-2.76%) strconv strconv.computeBounds 109 -> 107 (-1.83%) strconv.FormatInt 201 -> 197 (-1.99%) strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%) strconv.small 144 -> 134 (-6.94%) strconv.AppendInt 357 -> 344 (-3.64%) strconv.ryuDigits32 490 -> 488 (-0.41%) strconv.AppendUint 342 -> 340 (-0.58%) strconv [cmd/compile] strconv.FormatInt 201 -> 197 (-1.99%) strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%) strconv.ryuDigits32 490 -> 488 (-0.41%) strconv.AppendUint 342 -> 340 (-0.58%) strconv.computeBounds 109 -> 107 (-1.83%) strconv.small 144 -> 134 (-6.94%) strconv.AppendInt 357 -> 344 (-3.64%) image image.Rectangle.Inset 101 -> 97 (-3.96%) regexp/syntax regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%) regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%) regexp/syntax.ranges.Less 153 -> 150 (-1.96%) regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%) time time.Time.Before 179 -> 161 (-10.06%) time.Time.Compare 189 -> 166 (-12.17%) time.Time.Sub 444 -> 425 (-4.28%) time.Time.UnixMicro 106 -> 95 (-10.38%) time.div 592 -> 587 (-0.84%) time.Time.UnixNano 85 -> 78 (-8.24%) time.(*Time).UnixMilli 141 -> 140 (-0.71%) time.Time.UnixMilli 106 -> 95 (-10.38%) time.(*Time).UnixMicro 141 -> 140 (-0.71%) time.Time.After 179 -> 161 (-10.06%) time.Time.Equal 170 -> 150 (-11.76%) time.Time.AppendBinary 766 -> 757 (-1.17%) time.Time.IsZero 74 -> 66 (-10.81%) time.(*Time).UnixNano 124 -> 113 (-8.87%) time.(*Time).IsZero 113 -> 108 (-4.42%) regexp regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%) regexp.QuoteMeta 485 -> 469 (-3.30%) regexp/syntax [cmd/compile] regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%) regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%) regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%) regexp/syntax.ranges.Less 153 -> 150 (-1.96%) encoding/base64 encoding/base64.decodedLen 92 -> 90 (-2.17%) encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%) time [cmd/compile] time.(*Time).IsZero 113 -> 108 (-4.42%) time.Time.IsZero 74 -> 66 (-10.81%) time.(*Time).UnixNano 124 -> 113 (-8.87%) time.Time.UnixMilli 106 -> 95 (-10.38%) time.Time.Equal 170 -> 150 (-11.76%) time.Time.UnixMicro 106 -> 95 (-10.38%) time.(*Time).UnixMicro 141 -> 140 (-0.71%) time.Time.Before 179 -> 161 (-10.06%) time.Time.UnixNano 85 -> 78 (-8.24%) time.Time.AppendBinary 766 -> 757 (-1.17%) time.div 592 -> 587 (-0.84%) time.Time.After 179 -> 161 (-10.06%) time.Time.Compare 189 -> 166 (-12.17%) time.(*Time).UnixMilli 141 -> 140 (-0.71%) time.Time.Sub 444 -> 425 (-4.28%) index/suffixarray index/suffixarray.sais_8_32 1677 -> 1645 (-1.91%) index/suffixarray.sais_32 1677 -> 1645 (-1.91%) index/suffixarray.sais_64 1677 -> 1654 (-1.37%) index/suffixarray.sais_8_64 1677 -> 1654 (-1.37%) index/suffixarray.writeInt 249 -> 247 (-0.80%) os os.Expand 1070 -> 1051 (-1.78%) os.Chtimes 787 -> 774 (-1.65%) regexp [cmd/compile] regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%) regexp.QuoteMeta 485 -> 469 (-3.30%) encoding/base64 [cmd/compile] encoding/base64.decodedLen 92 -> 90 (-2.17%) encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%) encoding/hex encoding/hex.Encode 138 -> 136 (-1.45%) encoding/hex.(*decoder).Read 830 -> 824 (-0.72%) crypto/des crypto/des.initFeistelBox 235 -> 229 (-2.55%) crypto/des.cryptBlock 549 -> 538 (-2.00%) os [cmd/compile] os.Chtimes 787 -> 774 (-1.65%) os.Expand 1070 -> 1051 (-1.78%) math/big math/big.newFloat 238 -> 223 (-6.30%) math/big.nat.mul 2138 -> 2122 (-0.75%) math/big.karatsubaSqr 1372 -> 1369 (-0.22%) math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%) math/big.basicSqr 1032 -> 1017 (-1.45%) cmd/vendor/golang.org/x/sys/unix cmd/vendor/golang.org/x/sys/unix.TimeToTimespec 72 -> 66 (-8.33%) encoding/json encoding/json.Indent 404 -> 403 (-0.25%) encoding/json.MarshalIndent 303 -> 297 (-1.98%) testing testing.(*T).Deadline 84 -> 82 (-2.38%) testing.(*M).Run 3545 -> 3525 (-0.56%) archive/zip archive/zip.headerFileInfo.ModTime 229 -> 223 (-2.62%) encoding/gob encoding/gob.(*encoderState).encodeInt 474 -> 469 (-1.05%) crypto/elliptic crypto/elliptic.Marshal 728 -> 714 (-1.92%) debug/buildinfo debug/buildinfo.readString 325 -> 315 (-3.08%) image/png image/png.(*decoder).readImagePass 10866 -> 10834 (-0.29%) archive/tar archive/tar.Header.allowedFormats.func3 1768 -> 1736 (-1.81%) archive/tar.formatPAXTime 389 -> 358 (-7.97%) archive/tar.(*Writer).writeGNUHeader 741 -> 727 (-1.89%) archive/tar.readGNUSparseMap0x1 709 -> 695 (-1.97%) archive/tar.(*Writer).templateV7Plus 915 -> 909 (-0.66%) crypto/internal/cryptotest crypto/internal/cryptotest.TestHash.func4 890 -> 879 (-1.24%) crypto/internal/cryptotest.TestStream.func6.1 646 -> 645 (-0.15%) crypto/internal/cryptotest.testCipher.func3 1300 -> 1289 (-0.85%) internal/pkgbits internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%) internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%) testing/quick testing/quick.(*Config).getRand 316 -> 315 (-0.32%) log/slog log/slog.TimeValue 489 -> 479 (-2.04%) runtime/pprof runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%) internal/coverage/cfile internal/coverage/cfile.(*emitState).openMetaFile 824 -> 822 (-0.24%) internal/coverage/cfile.(*emitState).openCounterFile 904 -> 892 (-1.33%) cmd/internal/objabi cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%) crypto/ecdsa crypto/ecdsa.pointFromAffine 1162 -> 1144 (-1.55%) net net.minNonzeroTime 313 -> 308 (-1.60%) net.cgoLookupAddrPTR 812 -> 797 (-1.85%) net.(*IPNet).String 851 -> 827 (-2.82%) net.IP.AppendText 488 -> 471 (-3.48%) net.IPMask.String 281 -> 270 (-3.91%) net.partialDeadline 374 -> 366 (-2.14%) net.hexString 249 -> 240 (-3.61%) net.IP.String 454 -> 453 (-0.22%) internal/fuzz internal/fuzz.newPcgRand 240 -> 234 (-2.50%) crypto/x509 crypto/x509.(*Certificate).isValid 2642 -> 2611 (-1.17%) cmd/internal/obj/s390x cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%) encoding/hex [cmd/compile] encoding/hex.(*decoder).Read 830 -> 824 (-0.72%) encoding/hex.Encode 138 -> 136 (-1.45%) cmd/internal/objabi [cmd/compile] cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%) math/big [cmd/compile] math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%) math/big.nat.mul 2138 -> 2122 (-0.75%) math/big.karatsubaSqr 1372 -> 1369 (-0.22%) math/big.basicSqr 1032 -> 1017 (-1.45%) math/big.newFloat 238 -> 223 (-6.30%) encoding/json [cmd/compile] encoding/json.MarshalIndent 303 -> 297 (-1.98%) encoding/json.Indent 404 -> 403 (-0.25%) cmd/covdata main.(*metaMerge).emitCounters 985 -> 973 (-1.22%) runtime/pprof [cmd/compile] runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%) cmd/compile/internal/syntax cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%) cmd/dist main.runInstall 19081 -> 19049 (-0.17%) crypto/tls crypto/tls.extractPadding 176 -> 175 (-0.57%) slices.Clone[[]crypto/tls.SignatureScheme,crypto/tls.SignatureScheme] 253 -> 247 (-2.37%) slices.Clone[[]uint16,uint16] 253 -> 247 (-2.37%) slices.Clone[[]crypto/tls.CurveID,crypto/tls.CurveID] 253 -> 247 (-2.37%) crypto/tls.(*Config).cipherSuites 335 -> 326 (-2.69%) slices.DeleteFunc[go.shape.[]crypto/tls.CurveID,go.shape.uint16] 437 -> 434 (-0.69%) crypto/tls.dial 1349 -> 1339 (-0.74%) slices.DeleteFunc[go.shape.[]uint16,go.shape.uint16] 437 -> 434 (-0.69%) internal/pkgbits [cmd/compile] internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%) internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%) cmd/compile/internal/syntax [cmd/compile] cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%) cmd/internal/obj/s390x [cmd/compile] cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%) cmd/go/internal/trace cmd/go/internal/trace.Flow 910 -> 886 (-2.64%) cmd/go/internal/trace.(*Span).Done 311 -> 304 (-2.25%) cmd/go/internal/trace.StartSpan 620 -> 615 (-0.81%) cmd/internal/script cmd/internal/script.(*Engine).Execute.func2 534 -> 528 (-1.12%) cmd/link/internal/loader cmd/link/internal/loader.(*Loader).SetSymSect 344 -> 338 (-1.74%) net/http net/http.(*Transport).queueForIdleConn 1797 -> 1766 (-1.73%) net/http.(*Transport).getConn 2149 -> 2131 (-0.84%) net/http.(*http2ClientConn).tooIdleLocked 207 -> 197 (-4.83%) net/http.(*http2responseWriter).SetWriteDeadline.func1 520 -> 508 (-2.31%) net/http.(*Cookie).Valid 837 -> 818 (-2.27%) net/http.(*http2responseWriter).SetReadDeadline 373 -> 357 (-4.29%) net/http.checkIfRange 701 -> 690 (-1.57%) net/http.(*http2SettingsFrame).Value 325 -> 298 (-8.31%) net/http.(*http2SettingsFrame).HasDuplicates 777 -> 767 (-1.29%) net/http.(*Server).Serve 1746 -> 1739 (-0.40%) net/http.http2traceGotConn 569 -> 556 (-2.28%) net/http/pprof net/http/pprof.collectProfile 242 -> 239 (-1.24%) cmd/compile/internal/coverage cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%) cmd/vendor/golang.org/x/telemetry/internal/upload cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).findWork 4570 -> 4540 (-0.66%) cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).reports 3604 -> 3572 (-0.89%) cmd/compile/internal/coverage [cmd/compile] cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%) cmd/vendor/golang.org/x/text/language cmd/vendor/golang.org/x/text/language.regionGroupDist 287 -> 284 (-1.05%) cmd/go/internal/vcweb cmd/go/internal/vcweb.(*Server).overview.func1 1045 -> 1041 (-0.38%) cmd/go/internal/vcs cmd/go/internal/vcs.expand 761 -> 741 (-2.63%) cmd/compile/internal/inline/inlheur slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%) cmd/compile/internal/inline/inlheur [cmd/compile] slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%) cmd/go/internal/modfetch/codehost cmd/go/internal/modfetch/codehost.bzrParseStat 2217 -> 2213 (-0.18%) cmd/link/internal/ld cmd/link/internal/ld.decodetypeStructFieldCount 157 -> 152 (-3.18%) cmd/link/internal/ld.(*Link).address 12559 -> 12495 (-0.51%) cmd/link/internal/ld.(*dodataState).allocateDataSections 18345 -> 18205 (-0.76%) cmd/link/internal/ld.elfshreloc 618 -> 616 (-0.32%) cmd/link/internal/ld.(*deadcodePass).decodetypeMethods 794 -> 779 (-1.89%) cmd/link/internal/ld.(*dodataState).assignDsymsToSection 668 -> 663 (-0.75%) cmd/link/internal/ld.relocSectFn 285 -> 284 (-0.35%) cmd/link/internal/ld.decodetypeIfaceMethodCount 146 -> 144 (-1.37%) cmd/link/internal/ld.decodetypeArrayLen 157 -> 152 (-3.18%) cmd/link/internal/arm64 cmd/link/internal/arm64.gensymlate.func1 895 -> 888 (-0.78%) cmd/go/internal/modload cmd/go/internal/modload.queryProxy.func3 1029 -> 1012 (-1.65%) cmd/go/internal/load cmd/go/internal/load.(*Package).setBuildInfo 8453 -> 8447 (-0.07%) cmd/go/internal/clean cmd/go/internal/clean.runClean 2120 -> 2104 (-0.75%) cmd/compile/internal/ssa cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%) cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%) cmd/compile/internal/ssa.(*debugState).buildLocationLists 3326 -> 3294 (-0.96%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.(*debugState).processValue 9756 -> 9724 (-0.33%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%) cmd/compile/internal/ssa [cmd/compile] cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%) cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%) cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%) file before after Δ % math/bits.s 2352 2354 +2 +0.085% math/bits [cmd/compile].s 2352 2354 +2 +0.085% math.s 35675 35674 -1 -0.003% math [cmd/compile].s 35675 35674 -1 -0.003% runtime.s 577251 577245 -6 -0.001% runtime [cmd/compile].s 642419 642438 +19 +0.003% sort.s 37434 37435 +1 +0.003% strconv.s 48391 48343 -48 -0.099% sort [cmd/compile].s 37434 37435 +1 +0.003% bufio.s 21386 21418 +32 +0.150% strconv [cmd/compile].s 48391 48343 -48 -0.099% image.s 34978 35022 +44 +0.126% regexp/syntax.s 81719 81781 +62 +0.076% time.s 94341 94184 -157 -0.166% regexp.s 60411 60399 -12 -0.020% bufio [cmd/compile].s 21512 21544 +32 +0.149% encoding/binary.s 34062 34087 +25 +0.073% regexp/syntax [cmd/compile].s 81719 81781 +62 +0.076% encoding/base64.s 11907 11903 -4 -0.034% time [cmd/compile].s 94341 94184 -157 -0.166% index/suffixarray.s 41633 41527 -106 -0.255% os.s 101770 101738 -32 -0.031% regexp [cmd/compile].s 60411 60399 -12 -0.020% encoding/binary [cmd/compile].s 37173 37198 +25 +0.067% encoding/base64 [cmd/compile].s 11907 11903 -4 -0.034% os/exec.s 23900 23907 +7 +0.029% encoding/hex.s 6038 6030 -8 -0.132% crypto/des.s 5073 5056 -17 -0.335% os [cmd/compile].s 102030 101998 -32 -0.031% vendor/golang.org/x/net/http2/hpack.s 22027 22033 +6 +0.027% math/big.s 164808 164753 -55 -0.033% cmd/vendor/golang.org/x/sys/unix.s 121450 121444 -6 -0.005% encoding/json.s 110294 110287 -7 -0.006% testing.s 115303 115281 -22 -0.019% archive/zip.s 65329 65325 -4 -0.006% os/user.s 10078 10080 +2 +0.020% encoding/gob.s 143788 143783 -5 -0.003% crypto/elliptic.s 30686 30704 +18 +0.059% go/doc/comment.s 49401 49433 +32 +0.065% debug/buildinfo.s 9095 9085 -10 -0.110% image/png.s 36113 36081 -32 -0.089% archive/tar.s 71994 71897 -97 -0.135% crypto/internal/cryptotest.s 60872 60849 -23 -0.038% internal/pkgbits.s 20441 20429 -12 -0.059% testing/quick.s 8236 8235 -1 -0.012% log/slog.s 77568 77558 -10 -0.013% internal/trace/internal/oldtrace.s 52885 52896 +11 +0.021% runtime/pprof.s 123978 123969 -9 -0.007% internal/coverage/cfile.s 25198 25184 -14 -0.056% cmd/internal/objabi.s 19954 19946 -8 -0.040% crypto/ecdsa.s 29159 29141 -18 -0.062% log/slog/internal/benchmarks.s 6694 6695 +1 +0.015% net.s 299569 299503 -66 -0.022% os/exec [cmd/compile].s 23888 23895 +7 +0.029% internal/trace.s 179226 179240 +14 +0.008% internal/fuzz.s 86190 86191 +1 +0.001% crypto/x509.s 177195 177164 -31 -0.017% cmd/internal/obj/s390x.s 121642 121610 -32 -0.026% cmd/internal/obj/ppc64.s 140118 140122 +4 +0.003% encoding/hex [cmd/compile].s 6149 6141 -8 -0.130% cmd/internal/objabi [cmd/compile].s 19954 19946 -8 -0.040% cmd/internal/obj/arm64.s 158523 158555 +32 +0.020% go/doc/comment [cmd/compile].s 49512 49544 +32 +0.065% math/big [cmd/compile].s 166394 166339 -55 -0.033% encoding/json [cmd/compile].s 110712 110705 -7 -0.006% cmd/covdata.s 39699 39687 -12 -0.030% runtime/pprof [cmd/compile].s 125209 125200 -9 -0.007% cmd/compile/internal/syntax.s 181755 181736 -19 -0.010% cmd/dist.s 177893 177861 -32 -0.018% crypto/tls.s 389157 389113 -44 -0.011% internal/pkgbits [cmd/compile].s 41644 41632 -12 -0.029% cmd/compile/internal/syntax [cmd/compile].s 196105 196086 -19 -0.010% cmd/compile/internal/types.s 71315 71345 +30 +0.042% cmd/internal/obj/s390x [cmd/compile].s 121733 121701 -32 -0.026% cmd/go/internal/trace.s 4796 4760 -36 -0.751% cmd/internal/obj/arm64 [cmd/compile].s 168120 168147 +27 +0.016% cmd/internal/obj/ppc64 [cmd/compile].s 140219 140223 +4 +0.003% cmd/internal/script.s 83442 83436 -6 -0.007% cmd/link/internal/loader.s 93299 93294 -5 -0.005% net/http.s 620639 620472 -167 -0.027% net/http/pprof.s 35016 35013 -3 -0.009% cmd/compile/internal/coverage.s 6668 6667 -1 -0.015% cmd/vendor/golang.org/x/telemetry/internal/upload.s 34210 34148 -62 -0.181% cmd/compile/internal/coverage [cmd/compile].s 6664 6663 -1 -0.015% cmd/vendor/golang.org/x/text/language.s 48077 48074 -3 -0.006% cmd/go/internal/vcweb.s 45193 45189 -4 -0.009% cmd/go/internal/vcs.s 44749 44729 -20 -0.045% cmd/compile/internal/inline/inlheur.s 83758 83742 -16 -0.019% cmd/compile/internal/inline/inlheur [cmd/compile].s 84773 84757 -16 -0.019% cmd/go/internal/modfetch/codehost.s 89098 89094 -4 -0.004% cmd/trace.s 257550 257564 +14 +0.005% cmd/link/internal/ld.s 641945 641706 -239 -0.037% cmd/link/internal/arm64.s 34805 34798 -7 -0.020% cmd/go/internal/modload.s 328971 328954 -17 -0.005% cmd/go/internal/load.s 178877 178871 -6 -0.003% cmd/go/internal/clean.s 11006 10990 -16 -0.145% cmd/compile/internal/ssa.s 3552843 3553347 +504 +0.014% cmd/compile/internal/ssa [cmd/compile].s 3752511 3753123 +612 +0.016% total 36179015 36178687 -328 -0.001% Change-Id: I251c2898ccf3c9931d162d87dabbd49cf4ec73a5 Reviewed-on: https://go-review.googlesource.com/c/go/+/641757 Reviewed-by: Keith Randall Auto-Submit: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index 1a32c26ae2..ba7f181f5e 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -664,9 +664,11 @@ // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules // and further combining shifts. (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x) +(BT(Q|L)const [c] (ADDQ x x)) && c>1 => (BT(Q|L)const [c-1] x) (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x) (BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x) (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x) +(BTLconst [c] (ADDL x x)) && c>1 => (BTLconst [c-1] x) (BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x) (BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x) @@ -702,11 +704,11 @@ // We thus special-case them, by detecting the shift patterns. // Special case resetting first/last bit -(SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) +(ADD(L|Q) (SHR(L|Q)const [1] x) (SHR(L|Q)const [1] x)) => (AND(L|Q)const [-2] x) -(SHRLconst [1] (SHLLconst [1] x)) +(SHRLconst [1] (ADDL x x)) => (ANDLconst [0x7fffffff] x) -(SHRQconst [1] (SHLQconst [1] x)) +(SHRQconst [1] (ADDQ x x)) => (BTRQconst [63] x) // Special case testing first/last bit (with double-shift generated by generic.rules) @@ -933,17 +935,19 @@ (MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 x x)) (MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 x x)) +// Prefer addition when shifting left by one +(SHL(Q|L)const [1] x) => (ADD(Q|L) x x) + // combine add/shift into LEAQ/LEAL (ADD(L|Q) x (SHL(L|Q)const [3] y)) => (LEA(L|Q)8 x y) (ADD(L|Q) x (SHL(L|Q)const [2] y)) => (LEA(L|Q)4 x y) -(ADD(L|Q) x (SHL(L|Q)const [1] y)) => (LEA(L|Q)2 x y) (ADD(L|Q) x (ADD(L|Q) y y)) => (LEA(L|Q)2 x y) (ADD(L|Q) x (ADD(L|Q) x y)) => (LEA(L|Q)2 y x) // combine ADDQ/ADDQconst into LEAQ1/LEAL1 (ADD(Q|L)const [c] (ADD(Q|L) x y)) => (LEA(Q|L)1 [c] x y) (ADD(Q|L) (ADD(Q|L)const [c] x) y) => (LEA(Q|L)1 [c] x y) -(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) => (LEA(Q|L)1 [c] x x) +(ADD(Q|L)const [c] (ADD(Q|L) x x)) => (LEA(Q|L)1 [c] x x) // fold ADDQ/ADDL into LEAQ/LEAL (ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x) @@ -965,12 +969,18 @@ (LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y) // fold shifts into LEAQx/LEALx -(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)2 [c] {s} x y) +(LEA(Q|L)1 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)2 [c] {s} x y) (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y) (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y) -(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)4 [c] {s} x y) +(LEA(Q|L)2 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)4 [c] {s} x y) (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y) -(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)8 [c] {s} x y) +(LEA(Q|L)4 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)8 [c] {s} x y) + +// (x + x) << 1 -> x << 2 +(LEA(Q|L)2 [0] {s} (ADD(Q|L) x x) x) && s == nil => (SHL(Q|L)const [2] x) + +// (x + x) << 2 -> x << 3 and similar +(SHL(Q|L)const [c] (ADD(Q|L) x x)) => (SHL(Q|L)const [c+1] x) // reverse ordering of compare instruction (SETL (InvertFlags x)) => (SETG x) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 7dc0a7bdc2..28041ea76d 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1261,6 +1261,21 @@ func rewriteValueAMD64_OpAMD64ADCQconst(v *Value) bool { func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + // match: (ADDL (SHRLconst [1] x) (SHRLconst [1] x)) + // result: (ANDLconst [-2] x) + for { + if v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 { + break + } + x := v_0.Args[0] + if v_1.Op != OpAMD64SHRLconst || auxIntToInt8(v_1.AuxInt) != 1 || x != v_1.Args[0] { + break + } + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(-2) + v.AddArg(x) + return true + } // match: (ADDL x (MOVLconst [c])) // result: (ADDLconst [c] x) for { @@ -1307,21 +1322,6 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool { } break } - // match: (ADDL x (SHLLconst [1] y)) - // result: (LEAL2 x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 { - continue - } - y := v_1.Args[0] - v.reset(OpAMD64LEAL2) - v.AddArg2(x, y) - return true - } - break - } // match: (ADDL x (ADDL y y)) // result: (LEAL2 x y) for { @@ -1461,14 +1461,17 @@ func rewriteValueAMD64_OpAMD64ADDLconst(v *Value) bool { v.AddArg2(x, y) return true } - // match: (ADDLconst [c] (SHLLconst [1] x)) + // match: (ADDLconst [c] (ADDL x x)) // result: (LEAL1 [c] x x) for { c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 { + if v_0.Op != OpAMD64ADDL { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] { break } - x := v_0.Args[0] v.reset(OpAMD64LEAL1) v.AuxInt = int32ToAuxInt(c) v.AddArg2(x, x) @@ -1806,6 +1809,21 @@ func rewriteValueAMD64_OpAMD64ADDLmodify(v *Value) bool { func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + // match: (ADDQ (SHRQconst [1] x) (SHRQconst [1] x)) + // result: (ANDQconst [-2] x) + for { + if v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 { + break + } + x := v_0.Args[0] + if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 1 || x != v_1.Args[0] { + break + } + v.reset(OpAMD64ANDQconst) + v.AuxInt = int32ToAuxInt(-2) + v.AddArg(x) + return true + } // match: (ADDQ x (MOVQconst [c])) // cond: is32Bit(c) && !t.IsPtr() // result: (ADDQconst [int32(c)] x) @@ -1873,21 +1891,6 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool { } break } - // match: (ADDQ x (SHLQconst [1] y)) - // result: (LEAQ2 x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 { - continue - } - y := v_1.Args[0] - v.reset(OpAMD64LEAQ2) - v.AddArg2(x, y) - return true - } - break - } // match: (ADDQ x (ADDQ y y)) // result: (LEAQ2 x y) for { @@ -2052,14 +2055,17 @@ func rewriteValueAMD64_OpAMD64ADDQconst(v *Value) bool { v.AddArg2(x, y) return true } - // match: (ADDQconst [c] (SHLQconst [1] x)) + // match: (ADDQconst [c] (ADDQ x x)) // result: (LEAQ1 [c] x x) for { c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 { + if v_0.Op != OpAMD64ADDQ { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] { break } - x := v_0.Args[0] v.reset(OpAMD64LEAQ1) v.AuxInt = int32ToAuxInt(c) v.AddArg2(x, x) @@ -3637,6 +3643,23 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool { v.AddArg(x) return true } + // match: (BTLconst [c] (ADDQ x x)) + // cond: c>1 + // result: (BTLconst [c-1] x) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64ADDQ { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] || !(c > 1) { + break + } + v.reset(OpAMD64BTLconst) + v.AuxInt = int8ToAuxInt(c - 1) + v.AddArg(x) + return true + } // match: (BTLconst [c] (SHLQconst [d] x)) // cond: c>d // result: (BTLconst [c-d] x) @@ -3689,6 +3712,23 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool { v.AddArg(x) return true } + // match: (BTLconst [c] (ADDL x x)) + // cond: c>1 + // result: (BTLconst [c-1] x) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64ADDL { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] || !(c > 1) { + break + } + v.reset(OpAMD64BTLconst) + v.AuxInt = int8ToAuxInt(c - 1) + v.AddArg(x) + return true + } // match: (BTLconst [c] (SHLLconst [d] x)) // cond: c>d // result: (BTLconst [c-d] x) @@ -3761,6 +3801,23 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool { v.AddArg(x) return true } + // match: (BTQconst [c] (ADDQ x x)) + // cond: c>1 + // result: (BTQconst [c-1] x) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64ADDQ { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] || !(c > 1) { + break + } + v.reset(OpAMD64BTQconst) + v.AuxInt = int8ToAuxInt(c - 1) + v.AddArg(x) + return true + } // match: (BTQconst [c] (SHLQconst [d] x)) // cond: c>d // result: (BTQconst [c-d] x) @@ -8287,17 +8344,20 @@ func rewriteValueAMD64_OpAMD64LEAL1(v *Value) bool { } break } - // match: (LEAL1 [c] {s} x (SHLLconst [1] y)) + // match: (LEAL1 [c] {s} x (ADDL y y)) // result: (LEAL2 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) s := auxToSym(v.Aux) for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 { + if v_1.Op != OpAMD64ADDL { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { continue } - y := v_1.Args[0] v.reset(OpAMD64LEAL2) v.AuxInt = int32ToAuxInt(c) v.Aux = symToAux(s) @@ -8391,16 +8451,19 @@ func rewriteValueAMD64_OpAMD64LEAL2(v *Value) bool { v.AddArg2(x, y) return true } - // match: (LEAL2 [c] {s} x (SHLLconst [1] y)) + // match: (LEAL2 [c] {s} x (ADDL y y)) // result: (LEAL4 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) s := auxToSym(v.Aux) x := v_0 - if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 { + if v_1.Op != OpAMD64ADDL { + break + } + y := v_1.Args[1] + if y != v_1.Args[0] { break } - y := v_1.Args[0] v.reset(OpAMD64LEAL4) v.AuxInt = int32ToAuxInt(c) v.Aux = symToAux(s) @@ -8423,6 +8486,26 @@ func rewriteValueAMD64_OpAMD64LEAL2(v *Value) bool { v.AddArg2(x, y) return true } + // match: (LEAL2 [0] {s} (ADDL x x) x) + // cond: s == nil + // result: (SHLLconst [2] x) + for { + if auxIntToInt32(v.AuxInt) != 0 { + break + } + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDL { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] || x != v_1 || !(s == nil) { + break + } + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) + return true + } return false } func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool { @@ -8470,16 +8553,19 @@ func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool { v.AddArg2(x, y) return true } - // match: (LEAL4 [c] {s} x (SHLLconst [1] y)) + // match: (LEAL4 [c] {s} x (ADDL y y)) // result: (LEAL8 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) s := auxToSym(v.Aux) x := v_0 - if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 { + if v_1.Op != OpAMD64ADDL { + break + } + y := v_1.Args[1] + if y != v_1.Args[0] { break } - y := v_1.Args[0] v.reset(OpAMD64LEAL8) v.AuxInt = int32ToAuxInt(c) v.Aux = symToAux(s) @@ -8721,17 +8807,20 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool { } break } - // match: (LEAQ1 [c] {s} x (SHLQconst [1] y)) + // match: (LEAQ1 [c] {s} x (ADDQ y y)) // result: (LEAQ2 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) s := auxToSym(v.Aux) for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 { + if v_1.Op != OpAMD64ADDQ { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { continue } - y := v_1.Args[0] v.reset(OpAMD64LEAQ2) v.AuxInt = int32ToAuxInt(c) v.Aux = symToAux(s) @@ -8924,16 +9013,19 @@ func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool { v.AddArg2(x, y) return true } - // match: (LEAQ2 [c] {s} x (SHLQconst [1] y)) + // match: (LEAQ2 [c] {s} x (ADDQ y y)) // result: (LEAQ4 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) s := auxToSym(v.Aux) x := v_0 - if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 { + if v_1.Op != OpAMD64ADDQ { + break + } + y := v_1.Args[1] + if y != v_1.Args[0] { break } - y := v_1.Args[0] v.reset(OpAMD64LEAQ4) v.AuxInt = int32ToAuxInt(c) v.Aux = symToAux(s) @@ -8956,6 +9048,26 @@ func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool { v.AddArg2(x, y) return true } + // match: (LEAQ2 [0] {s} (ADDQ x x) x) + // cond: s == nil + // result: (SHLQconst [2] x) + for { + if auxIntToInt32(v.AuxInt) != 0 { + break + } + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQ { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] || x != v_1 || !(s == nil) { + break + } + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) + return true + } // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) @@ -9087,16 +9199,19 @@ func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool { v.AddArg2(x, y) return true } - // match: (LEAQ4 [c] {s} x (SHLQconst [1] y)) + // match: (LEAQ4 [c] {s} x (ADDQ y y)) // result: (LEAQ8 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) s := auxToSym(v.Aux) x := v_0 - if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 { + if v_1.Op != OpAMD64ADDQ { + break + } + y := v_1.Args[1] + if y != v_1.Args[0] { break } - y := v_1.Args[0] v.reset(OpAMD64LEAQ8) v.AuxInt = int32ToAuxInt(c) v.Aux = symToAux(s) @@ -20736,26 +20851,41 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { } func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool { v_0 := v.Args[0] - // match: (SHLLconst [1] (SHRLconst [1] x)) - // result: (ANDLconst [-2] x) + // match: (SHLLconst x [0]) + // result: x for { - if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 { + if auxIntToInt8(v.AuxInt) != 0 { break } - x := v_0.Args[0] - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(-2) - v.AddArg(x) + x := v_0 + v.copyOf(x) return true } - // match: (SHLLconst x [0]) - // result: x + // match: (SHLLconst [1] x) + // result: (ADDL x x) for { - if auxIntToInt8(v.AuxInt) != 0 { + if auxIntToInt8(v.AuxInt) != 1 { break } x := v_0 - v.copyOf(x) + v.reset(OpAMD64ADDL) + v.AddArg2(x, x) + return true + } + // match: (SHLLconst [c] (ADDL x x)) + // result: (SHLLconst [c+1] x) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64ADDL { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] { + break + } + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(c + 1) + v.AddArg(x) return true } // match: (SHLLconst [d] (MOVLconst [c])) @@ -20992,26 +21122,41 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { } func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool { v_0 := v.Args[0] - // match: (SHLQconst [1] (SHRQconst [1] x)) - // result: (ANDQconst [-2] x) + // match: (SHLQconst x [0]) + // result: x for { - if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 { + if auxIntToInt8(v.AuxInt) != 0 { break } - x := v_0.Args[0] - v.reset(OpAMD64ANDQconst) - v.AuxInt = int32ToAuxInt(-2) - v.AddArg(x) + x := v_0 + v.copyOf(x) return true } - // match: (SHLQconst x [0]) - // result: x + // match: (SHLQconst [1] x) + // result: (ADDQ x x) for { - if auxIntToInt8(v.AuxInt) != 0 { + if auxIntToInt8(v.AuxInt) != 1 { break } x := v_0 - v.copyOf(x) + v.reset(OpAMD64ADDQ) + v.AddArg2(x, x) + return true + } + // match: (SHLQconst [c] (ADDQ x x)) + // result: (SHLQconst [c+1] x) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64ADDQ { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] { + break + } + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(c + 1) + v.AddArg(x) return true } // match: (SHLQconst [d] (MOVQconst [c])) @@ -21419,13 +21564,16 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool { } func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool { v_0 := v.Args[0] - // match: (SHRLconst [1] (SHLLconst [1] x)) + // match: (SHRLconst [1] (ADDL x x)) // result: (ANDLconst [0x7fffffff] x) for { - if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 { + if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDL { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] { break } - x := v_0.Args[0] v.reset(OpAMD64ANDLconst) v.AuxInt = int32ToAuxInt(0x7fffffff) v.AddArg(x) @@ -21663,13 +21811,16 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool { } func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool { v_0 := v.Args[0] - // match: (SHRQconst [1] (SHLQconst [1] x)) + // match: (SHRQconst [1] (ADDQ x x)) // result: (BTRQconst [63] x) for { - if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 { + if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDQ { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] { break } - x := v_0.Args[0] v.reset(OpAMD64BTRQconst) v.AuxInt = int8ToAuxInt(63) v.AddArg(x) diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 4b47f6c13d..063055053e 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -185,6 +185,15 @@ func Pow2Muls(n1, n2 int) (int, int) { return a, b } +func Mul_2(n1 int32, n2 int64) (int32, int64) { + // amd64:"ADDL", -"SHLL" + a := n1 * 2 + // amd64:"ADDQ", -"SHLQ" + b := n2 * 2 + + return a, b +} + func Mul_96(n int) int { // amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ` // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL` diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 354dbf407a..c20e4d6733 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -120,6 +120,16 @@ func bitoff64(a, b uint64) (n uint64) { return n } +func clearLastBit(x int64, y int32) (int64, int32) { + // amd64:"ANDQ\t[$]-2" + a := (x >> 1) << 1 + + // amd64:"ANDL\t[$]-2" + b := (y >> 1) << 1 + + return a, b +} + func bitcompl64(a, b uint64) (n uint64) { // amd64:"BTCQ" n += b ^ (1 << (a & 63)) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 2d8cf86857..52efefb0ed 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -58,6 +58,16 @@ func rshConst64x64Overflow8(v int8) int64 { return int64(v) >> 8 } +func lshConst32x1(v int32) int32 { + // amd64:"ADDL", -"SHLL" + return v << 1 +} + +func lshConst64x1(v int64) int64 { + // amd64:"ADDQ", -"SHLQ" + return v << 1 +} + func lshConst32x64(v int32) int32 { // ppc64x:"SLW" // riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW" @@ -94,6 +104,26 @@ func rshConst64x32(v int64) int64 { return v >> uint32(33) } +func lshConst32x1Add(x int32) int32 { + // amd64:"SHLL\t[$]2" + return (x + x) << 1 +} + +func lshConst64x1Add(x int64) int64 { + // amd64:"SHLQ\t[$]2" + return (x + x) << 1 +} + +func lshConst32x2Add(x int32) int32 { + // amd64:"SHLL\t[$]3" + return (x + x) << 2 +} + +func lshConst64x2Add(x int64) int64 { + // amd64:"SHLQ\t[$]3" + return (x + x) << 2 +} + // ------------------ // // masked shifts // // ------------------ //