From: Jakub Ciolek Date: Wed, 21 May 2025 07:34:48 +0000 (+0200) Subject: cmd/compile: make dse track multiple shadowed ranges X-Git-Tag: go1.26rc1~205 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=d474f1fd21fde4a508235c48caf0acca6ce73642;p=gostls13.git cmd/compile: make dse track multiple shadowed ranges Track multiple shadowed ranges when doing DSE. Elides zeroing for: func nozero() (b [5]int64) { b[0] = 1 b[1] = 7 b[3] = 1 b[4] = 1 b[2] = 0 return b } goes from: v28 00003 (7) LEAQ main.b(SP), AX v5 00004 (7) MOVUPS X15, (AX) v5 00005 (7) MOVUPS X15, 16(AX) v5 00006 (7) MOVUPS X15, 24(AX) v11 00007 (8) MOVQ $1, main.b(SP) v16 00008 (9) MOVQ $7, main.b+8(SP) v20 00009 (10) MOVQ $1, main.b+24(SP) v24 00010 (11) MOVQ $1, main.b+32(SP) v29 00011 (12) MOVQ $0, main.b+16(SP) b1 00012 (13) RET 00013 (?) END to: v11 00003 (8) MOVQ $1, main.b(SP) v16 00004 (9) MOVQ $7, main.b+8(SP) v20 00005 (10) MOVQ $1, main.b+24(SP) v24 00006 (11) MOVQ $1, main.b+32(SP) v29 00007 (12) MOVQ $0, main.b+16(SP) b1 00008 (13) RET 00009 (?) END regexp linux/amd64: Find-16 89.17n ± ∞ ¹ 83.09n ± ∞ ¹ -6.82% (p=0.008 n=5) FindAllNoMatches-16 46.23n ± ∞ ¹ 44.26n ± ∞ ¹ -4.26% (p=0.008 n=5) FindString-16 89.77n ± ∞ ¹ 82.84n ± ∞ ¹ -7.72% (p=0.008 n=5) FindSubmatch-16 108.9n ± ∞ ¹ 101.6n ± ∞ ¹ -6.70% (p=0.008 n=5) FindStringSubmatch-16 103.10n ± ∞ ¹ 99.98n ± ∞ ¹ -3.03% (p=0.008 n=5) Literal-16 29.61n ± ∞ ¹ 29.17n ± ∞ ¹ -1.49% (p=0.008 n=5) NotLiteral-16 590.0n ± ∞ ¹ 566.0n ± ∞ ¹ -4.07% (p=0.008 n=5) MatchClass-16 894.7n ± ∞ ¹ 814.0n ± ∞ ¹ -9.02% (p=0.008 n=5) MatchClass_InRange-16 793.0n ± ∞ ¹ 756.3n ± ∞ ¹ -4.63% (p=0.008 n=5) ReplaceAll-16 513.9n ± ∞ ¹ 503.6n ± ∞ ¹ -2.00% (p=0.008 n=5) AnchoredLiteralShortNonMatch-16 21.70n ± ∞ ¹ 21.66n ± ∞ ¹ ~ (p=0.738 n=5) AnchoredLiteralLongNonMatch-16 21.74n ± ∞ ¹ 21.65n ± ∞ ¹ ~ (p=0.286 n=5) AnchoredShortMatch-16 37.71n ± ∞ ¹ 37.63n ± ∞ ¹ ~ (p=0.421 n=5) AnchoredLongMatch-16 37.75n ± ∞ ¹ 37.70n ± ∞ ¹ ~ (p=0.286 n=5) OnePassShortA-16 188.7n ± ∞ ¹ 185.7n ± ∞ ¹ -1.59% (p=0.008 n=5) NotOnePassShortA-16 188.0n ± ∞ ¹ 190.7n ± ∞ ¹ +1.44% (p=0.008 n=5) OnePassShortB-16 147.9n ± ∞ ¹ 154.2n ± ∞ ¹ +4.26% (p=0.008 n=5) NotOnePassShortB-16 141.2n ± ∞ ¹ 144.1n ± ∞ ¹ +2.05% (p=0.008 n=5) OnePassLongPrefix-16 40.43n ± ∞ ¹ 38.45n ± ∞ ¹ -4.90% (p=0.008 n=5) OnePassLongNotPrefix-16 100.6n ± ∞ ¹ 102.5n ± ∞ ¹ +1.89% (p=0.008 n=5) MatchParallelShared-16 9.666n ± ∞ ¹ 9.461n ± ∞ ¹ ~ (p=0.056 n=5) MatchParallelCopied-16 9.530n ± ∞ ¹ 9.540n ± ∞ ¹ ~ (p=0.841 n=5) QuoteMetaAll-16 28.60n ± ∞ ¹ 29.52n ± ∞ ¹ +3.22% (p=0.016 n=5) QuoteMetaNone-16 16.73n ± ∞ ¹ 16.74n ± ∞ ¹ ~ (p=0.817 n=5) Compile/Onepass-16 2.040µ ± ∞ ¹ 2.012µ ± ∞ ¹ ~ (p=0.381 n=5) Compile/Medium-16 4.652µ ± ∞ ¹ 4.661µ ± ∞ ¹ ~ (p=0.341 n=5) Compile/Hard-16 37.59µ ± ∞ ¹ 37.93µ ± ∞ ¹ ~ (p=0.222 n=5) Match/Easy0/16-16 1.848n ± ∞ ¹ 1.847n ± ∞ ¹ -0.05% (p=0.048 n=5) Match/Easy0/32-16 23.81n ± ∞ ¹ 24.16n ± ∞ ¹ +1.47% (p=0.008 n=5) Match/Easy0/1K-16 143.2n ± ∞ ¹ 148.6n ± ∞ ¹ +3.77% (p=0.008 n=5) Match/Easy0/32K-16 2.023µ ± ∞ ¹ 2.008µ ± ∞ ¹ -0.74% (p=0.024 n=5) Match/Easy0/1M-16 135.3µ ± ∞ ¹ 136.8µ ± ∞ ¹ +1.10% (p=0.016 n=5) Match/Easy0/32M-16 5.139m ± ∞ ¹ 5.123m ± ∞ ¹ -0.29% (p=0.008 n=5) Match/Easy0i/16-16 1.848n ± ∞ ¹ 1.847n ± ∞ ¹ ~ (p=0.167 n=5) Match/Easy0i/32-16 438.3n ± ∞ ¹ 421.9n ± ∞ ¹ -3.74% (p=0.008 n=5) Match/Easy0i/1K-16 12.93µ ± ∞ ¹ 12.25µ ± ∞ ¹ -5.25% (p=0.008 n=5) Match/Easy0i/32K-16 443.3µ ± ∞ ¹ 450.6µ ± ∞ ¹ +1.64% (p=0.008 n=5) Match/Easy0i/1M-16 14.26m ± ∞ ¹ 14.44m ± ∞ ¹ ~ (p=0.222 n=5) Match/Easy0i/32M-16 454.8m ± ∞ ¹ 459.0m ± ∞ ¹ ~ (p=0.056 n=5) Match/Easy1/16-16 1.848n ± ∞ ¹ 1.847n ± ∞ ¹ ~ (p=0.206 n=5) Match/Easy1/32-16 20.78n ± ∞ ¹ 20.95n ± ∞ ¹ ~ (p=0.841 n=5) Match/Easy1/1K-16 292.0n ± ∞ ¹ 278.6n ± ∞ ¹ -4.59% (p=0.008 n=5) Match/Easy1/32K-16 14.19µ ± ∞ ¹ 14.31µ ± ∞ ¹ +0.85% (p=0.008 n=5) Match/Easy1/1M-16 513.3µ ± ∞ ¹ 517.0µ ± ∞ ¹ +0.72% (p=0.008 n=5) Match/Easy1/32M-16 16.58m ± ∞ ¹ 16.69m ± ∞ ¹ +0.64% (p=0.008 n=5) Match/Medium/16-16 1.849n ± ∞ ¹ 1.847n ± ∞ ¹ -0.11% (p=0.024 n=5) Match/Medium/32-16 319.8n ± ∞ ¹ 312.1n ± ∞ ¹ -2.41% (p=0.008 n=5) Match/Medium/1K-16 12.54µ ± ∞ ¹ 12.66µ ± ∞ ¹ +0.97% (p=0.008 n=5) Match/Medium/32K-16 496.9µ ± ∞ ¹ 496.0µ ± ∞ ¹ ~ (p=0.056 n=5) Match/Medium/1M-16 16.11m ± ∞ ¹ 16.00m ± ∞ ¹ -0.63% (p=0.032 n=5) Match/Medium/32M-16 516.5m ± ∞ ¹ 513.1m ± ∞ ¹ -0.65% (p=0.032 n=5) Match/Hard/16-16 1.848n ± ∞ ¹ 1.847n ± ∞ ¹ ~ (p=0.238 n=5) Match/Hard/32-16 527.2n ± ∞ ¹ 508.4n ± ∞ ¹ -3.57% (p=0.008 n=5) Match/Hard/1K-16 15.53µ ± ∞ ¹ 15.42µ ± ∞ ¹ -0.68% (p=0.008 n=5) Match/Hard/32K-16 636.5µ ± ∞ ¹ 665.8µ ± ∞ ¹ +4.62% (p=0.008 n=5) Match/Hard/1M-16 20.45m ± ∞ ¹ 21.13m ± ∞ ¹ +3.30% (p=0.008 n=5) Match/Hard/32M-16 654.5m ± ∞ ¹ 671.9m ± ∞ ¹ +2.66% (p=0.008 n=5) Match/Hard1/16-16 1.538µ ± ∞ ¹ 1.499µ ± ∞ ¹ -2.54% (p=0.008 n=5) Match/Hard1/32-16 2.965µ ± ∞ ¹ 2.906µ ± ∞ ¹ -1.99% (p=0.008 n=5) Match/Hard1/1K-16 91.28µ ± ∞ ¹ 90.09µ ± ∞ ¹ -1.29% (p=0.008 n=5) Match/Hard1/32K-16 2.996m ± ∞ ¹ 3.311m ± ∞ ¹ +10.50% (p=0.008 n=5) Match/Hard1/1M-16 95.77m ± ∞ ¹ 105.87m ± ∞ ¹ +10.54% (p=0.008 n=5) Match/Hard1/32M-16 3.069 ± ∞ ¹ 3.399 ± ∞ ¹ +10.74% (p=0.008 n=5) Match_onepass_regex/16-16 120.9n ± ∞ ¹ 117.4n ± ∞ ¹ -2.89% (p=0.008 n=5) Match_onepass_regex/32-16 211.8n ± ∞ ¹ 207.5n ± ∞ ¹ -2.03% (p=0.008 n=5) Match_onepass_regex/1K-16 5.602µ ± ∞ ¹ 5.548µ ± ∞ ¹ ~ (p=0.421 n=5) Match_onepass_regex/32K-16 185.6µ ± ∞ ¹ 185.1µ ± ∞ ¹ ~ (p=0.690 n=5) Match_onepass_regex/1M-16 5.896m ± ∞ ¹ 5.808m ± ∞ ¹ -1.50% (p=0.016 n=5) Match_onepass_regex/32M-16 193.4m ± ∞ ¹ 185.5m ± ∞ ¹ -4.10% (p=0.008 n=5) geomean 3.815µ 3.796µ -0.51% compilecmp linux/amd64: file before after Δ % runtime.s 673118 672419 -699 -0.104% runtime [cmd/compile].s 720475 719781 -694 -0.096% math/rand/v2.s 9394 9371 -23 -0.245% bytes.s 36026 35999 -27 -0.075% vendor/golang.org/x/net/dns/dnsmessage.s 76433 76193 -240 -0.314% math/rand/v2 [cmd/compile].s 9394 9371 -23 -0.245% strings.s 43435 43414 -21 -0.048% syscall.s 82215 82183 -32 -0.039% html.s 6010 5949 -61 -1.015% bytes [cmd/compile].s 36615 36588 -27 -0.074% regexp/syntax.s 81442 81299 -143 -0.176% syscall [cmd/compile].s 82215 82183 -32 -0.039% time.s 90555 90507 -48 -0.053% regexp.s 58974 58876 -98 -0.166% reflect.s 176893 176829 -64 -0.036% context.s 14298 14234 -64 -0.448% plugin.s 3879 3847 -32 -0.825% io/fs.s 29026 29009 -17 -0.059% strings [cmd/compile].s 43446 43425 -21 -0.048% html [cmd/compile].s 6010 5949 -61 -1.015% time [cmd/compile].s 90555 90507 -48 -0.053% os.s 116321 116249 -72 -0.062% regexp/syntax [cmd/compile].s 81442 81299 -143 -0.176% context [cmd/compile].s 14298 14234 -64 -0.448% io/fs [cmd/compile].s 29026 29009 -17 -0.059% path/filepath.s 19879 19842 -37 -0.186% cmd/cgo/internal/cgotest.s 1965 1932 -33 -1.679% reflect [cmd/compile].s 176893 176829 -64 -0.036% regexp [cmd/compile].s 58974 58876 -98 -0.166% crypto/cipher.s 21706 21660 -46 -0.212% runtime/trace.s 14644 14634 -10 -0.068% math/big.s 170782 170250 -532 -0.312% debug/dwarf.s 105214 105141 -73 -0.069% log.s 15749 15603 -146 -0.927% encoding/json.s 118965 118933 -32 -0.027% os/user.s 10367 10326 -41 -0.395% crypto/dsa.s 4988 4974 -14 -0.281% crypto/rsa.s 29486 29474 -12 -0.041% database/sql.s 99574 99403 -171 -0.172% encoding/gob.s 146507 146475 -32 -0.022% debug/macho.s 29517 29421 -96 -0.325% crypto/ed25519.s 8648 8594 -54 -0.624% internal/goroot.s 3165 3107 -58 -1.833% testing.s 123472 123438 -34 -0.028% archive/tar.s 71179 71096 -83 -0.117% go/doc/comment.s 48429 48397 -32 -0.066% vendor/golang.org/x/crypto/cryptobyte.s 31717 31690 -27 -0.085% internal/cgrouptest.s 4760 4686 -74 -1.555% image/png.s 34484 34479 -5 -0.014% go/constant.s 29502 29297 -205 -0.695% internal/testenv.s 22396 22331 -65 -0.290% internal/pkgbits.s 19609 19598 -11 -0.056% testing/iotest.s 15070 15018 -52 -0.345% internal/runtime/gc/internal/gen.s 50837 50548 -289 -0.568% crypto/internal/cryptotest.s 58607 58229 -378 -0.645% crypto/ecdsa.s 43878 43658 -220 -0.501% cmd/internal/objabi.s 20244 20231 -13 -0.064% math/big/internal/asmgen.s 74554 74422 -132 -0.177% log/slog.s 81620 81617 -3 -0.004% net.s 299158 299080 -78 -0.026% cmd/vendor/golang.org/x/telemetry/internal/telemetry.s 4531 4472 -59 -1.302% testing/fstest.s 73370 73286 -84 -0.114% log/syslog.s 6457 6426 -31 -0.480% vendor/golang.org/x/net/http/httpproxy.s 7674 7666 -8 -0.104% cmd/vendor/golang.org/x/telemetry/internal/counter.s 31504 31493 -11 -0.035% cmd/internal/pkgpath.s 4828 4810 -18 -0.373% internal/trace.s 190495 190463 -32 -0.017% cmd/internal/telemetry/counter.s 1999 1979 -20 -1.001% net/mail.s 21912 21866 -46 -0.210% mime/multipart.s 30856 30806 -50 -0.162% internal/trace/internal/testgen.s 12870 12850 -20 -0.155% go/parser.s 109753 109739 -14 -0.013% crypto/x509.s 184334 183966 -368 -0.200% cmd/internal/pgo.s 7886 7850 -36 -0.457% cmd/internal/browser.s 1980 1962 -18 -0.909% cmd/covdata.s 40197 40180 -17 -0.042% internal/fuzz.s 90255 90234 -21 -0.023% go/build.s 74975 74722 -253 -0.337% cmd/distpack.s 29343 29056 -287 -0.978% cmd/cover.s 53513 53412 -101 -0.189% cmd/internal/obj.s 144804 144764 -40 -0.028% os [cmd/compile].s 116325 116253 -72 -0.062% cmd/cgo.s 217917 217878 -39 -0.018% internal/exportdata.s 8849 8800 -49 -0.554% cmd/dist.s 179720 179253 -467 -0.260% cmd/compile/internal/syntax.s 174526 174495 -31 -0.018% cmd/asm/internal/lex.s 21635 21628 -7 -0.032% cmd/internal/obj/riscv.s 149150 149118 -32 -0.021% path/filepath [cmd/compile].s 19879 19842 -37 -0.186% cmd/internal/obj/wasm.s 83633 83569 -64 -0.077% crypto/tls.s 405459 405103 -356 -0.088% cmd/internal/obj/loong64.s 117422 117392 -30 -0.026% log [cmd/compile].s 15798 15652 -146 -0.924% crypto/cipher [cmd/compile].s 21706 21660 -46 -0.212% go/types.s 592053 591930 -123 -0.021% cmd/vendor/golang.org/x/telemetry/internal/telemetry [cmd/compile].s 4531 4472 -59 -1.302% cmd/internal/objabi [cmd/compile].s 20244 20231 -13 -0.064% encoding/json [cmd/compile].s 119184 119152 -32 -0.027% go/internal/srcimporter.s 9957 9948 -9 -0.090% internal/goroot [cmd/compile].s 3165 3107 -58 -1.833% runtime/trace [cmd/compile].s 14703 14693 -10 -0.068% go/internal/gccgoimporter.s 47218 47189 -29 -0.061% cmd/vendor/golang.org/x/telemetry/internal/counter [cmd/compile].s 31563 31552 -11 -0.035% go/doc/comment [cmd/compile].s 48488 48456 -32 -0.066% cmd/compile/internal/base.s 44391 44379 -12 -0.027% cmd/vendor/golang.org/x/tools/internal/analysis/analyzerutil.s 3957 3925 -32 -0.809% math/big [cmd/compile].s 173023 172491 -532 -0.307% cmd/asm.s 3824 3749 -75 -1.961% cmd/vendor/golang.org/x/tools/internal/diff/lcs.s 22413 22289 -124 -0.553% cmd/internal/telemetry/counter [cmd/compile].s 1999 1979 -20 -1.001% cmd/vendor/golang.org/x/tools/go/analysis/passes/buildtag.s 8886 8877 -9 -0.101% go/constant [cmd/compile].s 29673 29468 -205 -0.691% cmd/internal/obj [cmd/compile].s 218137 218102 -35 -0.016% cmd/internal/pgo [cmd/compile].s 7945 7909 -36 -0.453% internal/pkgbits [cmd/compile].s 37142 37115 -27 -0.073% go/parser [cmd/compile].s 109812 109798 -14 -0.013% cmd/compile/internal/base [cmd/compile].s 44607 44595 -12 -0.027% go/build [cmd/compile].s 75034 74781 -253 -0.337% cmd/vendor/golang.org/x/tools/go/analysis/internal/analysisflags.s 10379 10338 -41 -0.395% cmd/go/internal/lockedfile.s 15477 15473 -4 -0.026% cmd/internal/obj/loong64 [cmd/compile].s 117481 117451 -30 -0.026% cmd/internal/obj/wasm [cmd/compile].s 83677 83613 -64 -0.076% cmd/internal/obj/riscv [cmd/compile].s 148985 148953 -32 -0.021% cmd/vendor/golang.org/x/tools/internal/analysis/driverutil.s 31164 31100 -64 -0.205% internal/exportdata [cmd/compile].s 8849 8800 -49 -0.554% cmd/vendor/golang.org/x/mod/sumdb/dirhash.s 12387 12366 -21 -0.170% cmd/vendor/golang.org/x/tools/internal/typesinternal.s 24320 24295 -25 -0.103% cmd/go/internal/fsys.s 60108 60040 -68 -0.113% net/http.s 603320 602752 -568 -0.094% cmd/compile/internal/syntax [cmd/compile].s 187371 187340 -31 -0.017% cmd/cgo/internal/test.s 219885 219826 -59 -0.027% net/http/httptest.s 21757 21754 -3 -0.014% cmd/compile/internal/types2.s 576035 575871 -164 -0.028% net/http/cgi.s 36196 36146 -50 -0.138% net/http/httputil.s 45557 45502 -55 -0.121% cmd/compile/internal/objw.s 5710 5672 -38 -0.665% net/http/pprof.s 32053 32011 -42 -0.131% internal/trace/traceviewer.s 34748 34695 -53 -0.153% net/rpc.s 44569 44361 -208 -0.467% cmd/compile/internal/staticdata.s 21461 21446 -15 -0.070% cmd/vendor/golang.org/x/telemetry/internal/crashmonitor.s 6104 6073 -31 -0.508% cmd/go/internal/cfg.s 14419 14303 -116 -0.804% cmd/vendor/golang.org/x/tools/go/analysis/passes/hostport.s 6834 6802 -32 -0.468% cmd/vendor/golang.org/x/tools/go/types/objectpath.s 19228 19118 -110 -0.572% cmd/go/internal/imports.s 15978 15970 -8 -0.050% cmd/vendor/golang.org/x/tools/internal/facts.s 15249 15237 -12 -0.079% cmd/vendor/golang.org/x/telemetry/internal/upload.s 34546 33957 -589 -1.705% cmd/vendor/golang.org/x/mod/sumdb.s 28991 28941 -50 -0.172% cmd/vendor/golang.org/x/telemetry.s 7555 7420 -135 -1.787% cmd/gofmt.s 29924 29898 -26 -0.087% cmd/go/internal/base.s 19950 19938 -12 -0.060% cmd/vendor/golang.org/x/tools/internal/refactor/inline.s 161628 161596 -32 -0.020% cmd/internal/script.s 89932 89811 -121 -0.135% cmd/vendor/golang.org/x/tools/go/analysis/unitchecker.s 14865 14797 -68 -0.457% cmd/vendor/golang.org/x/arch/riscv64/riscv64asm.s 62049 62017 -32 -0.052% cmd/vendor/golang.org/x/mod/zip.s 39525 39428 -97 -0.245% cmd/compile/internal/typecheck.s 170567 170522 -45 -0.026% cmd/go/internal/cache.s 37546 37451 -95 -0.253% cmd/go/internal/gover.s 6733 6726 -7 -0.104% cmd/vendor/golang.org/x/arch/arm/armasm.s 30032 29991 -41 -0.137% cmd/go/internal/auth.s 22485 22385 -100 -0.445% cmd/go/internal/search.s 15362 15262 -100 -0.651% cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize.s 197385 196963 -422 -0.214% cmd/go/internal/doc.s 62764 62590 -174 -0.277% cmd/compile/internal/compare.s 10769 10758 -11 -0.102% cmd/vendor/golang.org/x/arch/ppc64/ppc64asm.s 208027 208005 -22 -0.011% cmd/compile/internal/escape.s 72599 72560 -39 -0.054% cmd/go/internal/mvs.s 30363 30339 -24 -0.079% cmd/go/internal/vcweb.s 51575 51334 -241 -0.467% cmd/go/internal/modindex.s 80742 80444 -298 -0.369% cmd/vendor/golang.org/x/arch/x86/x86asm.s 72057 72004 -53 -0.074% cmd/compile/internal/objw [cmd/compile].s 7408 7345 -63 -0.850% cmd/go/internal/vcs.s 51661 51545 -116 -0.225% cmd/go/internal/modfetch/codehost.s 110786 110689 -97 -0.088% cmd/nm.s 5055 4981 -74 -1.464% cmd/pack.s 8826 8804 -22 -0.249% cmd/compile/internal/types2 [cmd/compile].s 667403 667271 -132 -0.020% cmd/internal/script/scripttest.s 40383 40149 -234 -0.579% cmd/compile/internal/typecheck [cmd/compile].s 196101 196056 -45 -0.023% cmd/preprofile.s 3127 3107 -20 -0.640% cmd/compile/internal/staticdata [cmd/compile].s 31032 31017 -15 -0.048% cmd/go/internal/modfetch.s 134544 134056 -488 -0.363% cmd/vendor/golang.org/x/tools/go/analysis/passes/atomic.s 2433 2408 -25 -1.028% cmd/vendor/github.com/google/pprof/profile.s 155233 155173 -60 -0.039% cmd/vendor/golang.org/x/tools/go/analysis/passes/asmdecl.s 29779 29182 -597 -2.005% cmd/link/internal/loader.s 91163 91116 -47 -0.052% cmd/compile/internal/escape [cmd/compile].s 86596 86592 -4 -0.005% cmd/go/internal/fips140.s 5054 5043 -11 -0.218% cmd/vendor/github.com/google/pprof/internal/symbolz.s 4726 4674 -52 -1.100% cmd/vendor/golang.org/x/tools/go/analysis/passes/framepointer.s 3703 3660 -43 -1.161% cmd/vendor/golang.org/x/tools/go/analysis/passes/loopclosure.s 6869 6844 -25 -0.364% cmd/vendor/golang.org/x/text/language.s 46461 46443 -18 -0.039% cmd/vendor/golang.org/x/text/internal.s 2514 2502 -12 -0.477% cmd/trace.s 259231 258558 -673 -0.260% cmd/vendor/golang.org/x/tools/go/analysis/passes/ctrlflow.s 3996 3987 -9 -0.225% cmd/vendor/golang.org/x/tools/go/analysis/passes/testinggoroutine.s 9587 9555 -32 -0.334% cmd/vendor/golang.org/x/text/cases.s 38482 38463 -19 -0.049% cmd/vendor/golang.org/x/tools/go/analysis/passes/unsafeptr.s 3150 3124 -26 -0.825% cmd/vendor/github.com/ianlancetaylor/demangle.s 302482 302444 -38 -0.013% cmd/vendor/github.com/google/pprof/internal/report.s 90542 90448 -94 -0.104% cmd/vendor/github.com/google/pprof/internal/binutils.s 37434 37280 -154 -0.411% cmd/vendor/rsc.io/markdown.s 114203 114108 -95 -0.083% cmd/go/internal/modload.s 359362 358159 -1203 -0.335% cmd/vendor/golang.org/x/build/relnote.s 31599 31577 -22 -0.070% cmd/vendor/github.com/google/pprof/internal/driver.s 178419 177787 -632 -0.354% cmd/go/internal/load.s 186922 186634 -288 -0.154% cmd/link/internal/ld.s 643871 643415 -456 -0.071% cmd/link/internal/riscv64.s 19743 19726 -17 -0.086% cmd/go/internal/work.s 348917 348463 -454 -0.130% cmd/go/internal/clean.s 14815 14755 -60 -0.405% cmd/go/internal/list.s 29662 29630 -32 -0.108% cmd/go/internal/tool.s 27842 27825 -17 -0.061% cmd/go/internal/envcmd.s 49896 49872 -24 -0.048% cmd/go/internal/test.s 72162 72098 -64 -0.089% cmd/go/internal/bug.s 10603 10547 -56 -0.528% cmd/go/internal/toolchain.s 29042 28890 -152 -0.523% cmd/go/internal/modcmd.s 61916 61761 -155 -0.250% cmd/go/internal/modget.s 79559 79358 -201 -0.253% cmd/go/internal/workcmd.s 28612 28481 -131 -0.458% cmd/go.s 13367 13343 -24 -0.180% cmd/compile/internal/ssa.s 3614387 3614418 +31 +0.001% cmd/compile/internal/liveness.s 96901 96882 -19 -0.020% cmd/compile/internal/ssa [cmd/compile].s 3779800 3779973 +173 +0.005% cmd/compile/internal/liveness [cmd/compile].s 129898 129895 -3 -0.002% cmd/compile/internal/ssagen.s 436780 436748 -32 -0.007% cmd/compile/internal/ssagen [cmd/compile].s 473190 473109 -81 -0.017% cmd/compile/internal/walk [cmd/compile].s 347940 347810 -130 -0.037% cmd/compile/internal/walk.s 334528 334382 -146 -0.044% cmd/compile/internal/noder.s 260365 260297 -68 -0.026% cmd/compile/internal/noder [cmd/compile].s 296865 296819 -46 -0.015% cmd/compile/internal/gc.s 30442 30346 -96 -0.315% cmd/compile/internal/gc [cmd/compile].s 41682 41586 -96 -0.230% total 38124617 38101256 -23361 -0.061% Change-Id: Id0b3770da69c6f666b3ff36741f75377001466c0 Reviewed-on: https://go-review.googlesource.com/c/go/+/675335 Auto-Submit: Keith Randall Reviewed-by: Mark Freeman Reviewed-by: Keith Randall Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/compile/internal/ssa/deadstore.go b/src/cmd/compile/internal/ssa/deadstore.go index cdf290e2aa..17a0809cb7 100644 --- a/src/cmd/compile/internal/ssa/deadstore.go +++ b/src/cmd/compile/internal/ssa/deadstore.go @@ -10,6 +10,10 @@ import ( "cmd/internal/obj" ) +// maxShadowRanges bounds the number of disjoint byte intervals +// we track per pointer to avoid quadratic behaviour. +const maxShadowRanges = 64 + // dse does dead-store elimination on the Function. // Dead stores are those which are unconditionally followed by // another store to the same location, with no intervening load. @@ -24,6 +28,10 @@ func dse(f *Func) { defer f.retSparseMap(shadowed) // localAddrs maps from a local variable (the Aux field of a LocalAddr value) to an instance of a LocalAddr value for that variable in the current block. localAddrs := map[any]*Value{} + + // shadowedRanges stores the actual range data. The 'shadowed' sparseMap stores a 1-based index into this slice. + var shadowedRanges []*shadowRanges + for _, b := range f.Blocks { // Find all the stores in this block. Categorize their uses: // loadUse contains stores which are used by a subsequent load. @@ -89,10 +97,11 @@ func dse(f *Func) { // Walk backwards looking for dead stores. Keep track of shadowed addresses. // A "shadowed address" is a pointer, offset, and size describing a memory region that // is known to be written. We keep track of shadowed addresses in the shadowed map, - // mapping the ID of the address to a shadowRange where future writes will happen. + // mapping the ID of the address to a shadowRanges where future writes will happen. // Since we're walking backwards, writes to a shadowed region are useless, // as they will be immediately overwritten. shadowed.clear() + shadowedRanges = shadowedRanges[:0] v := last walkloop: @@ -100,6 +109,7 @@ func dse(f *Func) { // Someone might be reading this memory state. // Clear all shadowed addresses. shadowed.clear() + shadowedRanges = shadowedRanges[:0] } if v.Op == OpStore || v.Op == OpZero { ptr := v.Args[0] @@ -119,9 +129,14 @@ func dse(f *Func) { ptr = la } } - srNum, _ := shadowed.get(ptr.ID) - sr := shadowRange(srNum) - if sr.contains(off, off+sz) { + var si *shadowRanges + idx, ok := shadowed.get(ptr.ID) + if ok { + // The sparseMap stores a 1-based index, so we subtract 1. + si = shadowedRanges[idx-1] + } + + if si != nil && si.contains(off, off+sz) { // Modify the store/zero into a copy of the memory state, // effectively eliding the store operation. if v.Op == OpStore { @@ -136,7 +151,13 @@ func dse(f *Func) { v.Op = OpCopy } else { // Extend shadowed region. - shadowed.set(ptr.ID, int32(sr.merge(off, off+sz))) + if si == nil { + si = &shadowRanges{} + shadowedRanges = append(shadowedRanges, si) + // Store a 1-based index in the sparseMap. + shadowed.set(ptr.ID, int32(len(shadowedRanges))) + } + si.add(off, off+sz) } } // walk to previous store @@ -156,46 +177,51 @@ func dse(f *Func) { } } -// A shadowRange encodes a set of byte offsets [lo():hi()] from -// a given pointer that will be written to later in the block. -// A zero shadowRange encodes an empty shadowed range. -type shadowRange int32 - -func (sr shadowRange) lo() int64 { - return int64(sr & 0xffff) +// shadowRange represents a single byte range [lo,hi] that will be written. +type shadowRange struct { + lo, hi uint16 } -func (sr shadowRange) hi() int64 { - return int64((sr >> 16) & 0xffff) +// shadowRanges stores an unordered collection of disjoint byte ranges. +type shadowRanges struct { + ranges []shadowRange } // contains reports whether [lo:hi] is completely within sr. -func (sr shadowRange) contains(lo, hi int64) bool { - return lo >= sr.lo() && hi <= sr.hi() +func (sr *shadowRanges) contains(lo, hi int64) bool { + for _, r := range sr.ranges { + if lo >= int64(r.lo) && hi <= int64(r.hi) { + return true + } + } + return false } -// merge returns the union of sr and [lo:hi]. -// merge is allowed to return something smaller than the union. -func (sr shadowRange) merge(lo, hi int64) shadowRange { - if lo < 0 || hi > 0xffff { - // Ignore offsets that are too large or small. - return sr - } - if sr.lo() == sr.hi() { - // Old range is empty - use new one. - return shadowRange(lo + hi<<16) +func (sr *shadowRanges) add(lo, hi int64) { + // Ignore the store if: + // - the range doesn't fit in 16 bits, or + // - we already track maxShadowRanges intervals. + // The cap prevents a theoretical O(n^2) blow-up. + if lo < 0 || hi > 0xffff || len(sr.ranges) >= maxShadowRanges { + return } - if hi < sr.lo() || lo > sr.hi() { - // The two regions don't overlap or abut, so we would - // have to keep track of multiple disjoint ranges. - // Because we can only keep one, keep the larger one. - if sr.hi()-sr.lo() >= hi-lo { - return sr + nlo := lo + nhi := hi + out := sr.ranges[:0] + + for _, r := range sr.ranges { + if nhi < int64(r.lo) || nlo > int64(r.hi) { + out = append(out, r) + continue + } + if int64(r.lo) < nlo { + nlo = int64(r.lo) + } + if int64(r.hi) > nhi { + nhi = int64(r.hi) } - return shadowRange(lo + hi<<16) } - // Regions overlap or abut - compute the union. - return shadowRange(min(lo, sr.lo()) + max(hi, sr.hi())<<16) + sr.ranges = append(out, shadowRange{uint16(nlo), uint16(nhi)}) } // elimDeadAutosGeneric deletes autos that are never accessed. To achieve this diff --git a/src/cmd/compile/internal/ssa/deadstore_test.go b/src/cmd/compile/internal/ssa/deadstore_test.go index 4ccd6b8e91..7c7a4dacf0 100644 --- a/src/cmd/compile/internal/ssa/deadstore_test.go +++ b/src/cmd/compile/internal/ssa/deadstore_test.go @@ -7,6 +7,8 @@ package ssa import ( "cmd/compile/internal/types" "cmd/internal/src" + "fmt" + "sort" "testing" ) @@ -172,3 +174,335 @@ func TestDeadStoreSmallStructInit(t *testing.T) { t.Errorf("dead store not removed") } } + +func TestDeadStoreArrayGap(t *testing.T) { + c := testConfig(t) + ptr := c.config.Types.BytePtr + i64 := c.config.Types.Int64 + + typ := types.NewArray(i64, 5) + tmp := c.Temp(typ) + + fun := c.Fun("entry", + Bloc("entry", + Valu("start", OpInitMem, types.TypeMem, 0, nil), + Valu("sp", OpSP, c.config.Types.Uintptr, 0, nil), + + Valu("base", OpLocalAddr, ptr, 0, tmp, "sp", "start"), + + Valu("p0", OpOffPtr, ptr, 0, nil, "base"), + Valu("p1", OpOffPtr, ptr, 8, nil, "base"), + Valu("p2", OpOffPtr, ptr, 16, nil, "base"), + Valu("p3", OpOffPtr, ptr, 24, nil, "base"), + Valu("p4", OpOffPtr, ptr, 32, nil, "base"), + + Valu("one", OpConst64, i64, 1, nil), + Valu("seven", OpConst64, i64, 7, nil), + Valu("zero", OpConst64, i64, 0, nil), + + Valu("mem0", OpZero, types.TypeMem, 40, typ, "base", "start"), + + Valu("s0", OpStore, types.TypeMem, 0, i64, "p0", "one", "mem0"), + Valu("s1", OpStore, types.TypeMem, 0, i64, "p1", "seven", "s0"), + Valu("s2", OpStore, types.TypeMem, 0, i64, "p3", "one", "s1"), + Valu("s3", OpStore, types.TypeMem, 0, i64, "p4", "one", "s2"), + Valu("s4", OpStore, types.TypeMem, 0, i64, "p2", "zero", "s3"), + + Goto("exit")), + Bloc("exit", + Exit("s4"))) + + CheckFunc(fun.f) + dse(fun.f) + CheckFunc(fun.f) + + if op := fun.values["mem0"].Op; op != OpCopy { + t.Fatalf("dead Zero not removed: got %s, want OpCopy", op) + } +} + +func TestShadowRanges(t *testing.T) { + t.Run("simple insert & contains", func(t *testing.T) { + var sr shadowRanges + sr.add(10, 20) + + wantRanges(t, sr.ranges, [][2]uint16{{10, 20}}) + if !sr.contains(12, 18) || !sr.contains(10, 20) { + t.Fatalf("contains failed after simple add") + } + if sr.contains(9, 11) || sr.contains(11, 21) { + t.Fatalf("contains erroneously true for non-contained range") + } + }) + + t.Run("merge overlapping", func(t *testing.T) { + var sr shadowRanges + sr.add(10, 20) + sr.add(15, 25) + + wantRanges(t, sr.ranges, [][2]uint16{{10, 25}}) + if !sr.contains(13, 24) { + t.Fatalf("contains should be true after merge") + } + }) + + t.Run("merge touching boundary", func(t *testing.T) { + var sr shadowRanges + sr.add(100, 150) + // touches at 150 - should coalesce + sr.add(150, 180) + + wantRanges(t, sr.ranges, [][2]uint16{{100, 180}}) + }) + + t.Run("union across several ranges", func(t *testing.T) { + var sr shadowRanges + sr.add(10, 20) + sr.add(30, 40) + // bridges second, not first + sr.add(25, 35) + + wantRanges(t, sr.ranges, [][2]uint16{{10, 20}, {25, 40}}) + + // envelops everything + sr.add(5, 50) + wantRanges(t, sr.ranges, [][2]uint16{{5, 50}}) + }) + + t.Run("disjoint intervals stay separate", func(t *testing.T) { + var sr shadowRanges + sr.add(10, 20) + sr.add(22, 30) + + wantRanges(t, sr.ranges, [][2]uint16{{10, 20}, {22, 30}}) + // spans both + if sr.contains(15, 25) { + t.Fatalf("contains across two disjoint ranges should be false") + } + }) + + t.Run("large uint16 offsets still work", func(t *testing.T) { + var sr shadowRanges + sr.add(40000, 45000) + + if !sr.contains(42000, 43000) { + t.Fatalf("contains failed for large uint16 values") + } + }) + + t.Run("out-of-bounds inserts ignored", func(t *testing.T) { + var sr shadowRanges + sr.add(10, 20) + sr.add(-5, 5) + sr.add(70000, 70010) + + wantRanges(t, sr.ranges, [][2]uint16{{10, 20}}) + }) +} + +// canonicalise order for comparisons +func sortRanges(r []shadowRange) { + sort.Slice(r, func(i, j int) bool { return r[i].lo < r[j].lo }) +} + +// compare actual slice with expected pairs +func wantRanges(t *testing.T, got []shadowRange, want [][2]uint16) { + t.Helper() + sortRanges(got) + + if len(got) != len(want) { + t.Fatalf("len(ranges)=%d, want %d (got=%v)", len(got), len(want), got) + } + + for i, w := range want { + if got[i].lo != w[0] || got[i].hi != w[1] { + t.Fatalf("range %d = [%d,%d], want [%d,%d] (full=%v)", + i, got[i].lo, got[i].hi, w[0], w[1], got) + } + } +} + +func BenchmarkDeadStore(b *testing.B) { + cfg := testConfig(b) + ptr := cfg.config.Types.BytePtr + + f := cfg.Fun("entry", + Bloc("entry", + Valu("start", OpInitMem, types.TypeMem, 0, nil), + Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil), + Valu("v", OpConstBool, cfg.config.Types.Bool, 1, nil), + Valu("a1", OpAddr, ptr, 0, nil, "sb"), + Valu("a2", OpAddr, ptr, 0, nil, "sb"), + Valu("a3", OpAddr, ptr, 0, nil, "sb"), + Valu("z1", OpZero, types.TypeMem, 1, cfg.config.Types.Bool, "a3", "start"), + Valu("s1", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a1", "v", "z1"), + Valu("s2", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a2", "v", "s1"), + Valu("s3", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a1", "v", "s2"), + Valu("s4", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a3", "v", "s3"), + Goto("exit")), + Bloc("exit", + Exit("s3"))) + + runBench(b, func() { + dse(f.f) + }) +} + +func BenchmarkDeadStorePhi(b *testing.B) { + cfg := testConfig(b) + ptr := cfg.config.Types.BytePtr + + f := cfg.Fun("entry", + Bloc("entry", + Valu("start", OpInitMem, types.TypeMem, 0, nil), + Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil), + Valu("v", OpConstBool, cfg.config.Types.Bool, 1, nil), + Valu("addr", OpAddr, ptr, 0, nil, "sb"), + Goto("loop")), + Bloc("loop", + Valu("phi", OpPhi, types.TypeMem, 0, nil, "start", "store"), + Valu("store", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "addr", "v", "phi"), + If("v", "loop", "exit")), + Bloc("exit", + Exit("store"))) + + runBench(b, func() { + dse(f.f) + }) +} + +func BenchmarkDeadStoreTypes(b *testing.B) { + cfg := testConfig(b) + + t1 := cfg.config.Types.UInt64.PtrTo() + t2 := cfg.config.Types.UInt32.PtrTo() + + f := cfg.Fun("entry", + Bloc("entry", + Valu("start", OpInitMem, types.TypeMem, 0, nil), + Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil), + Valu("v", OpConstBool, cfg.config.Types.Bool, 1, nil), + Valu("a1", OpAddr, t1, 0, nil, "sb"), + Valu("a2", OpAddr, t2, 0, nil, "sb"), + Valu("s1", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a1", "v", "start"), + Valu("s2", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a2", "v", "s1"), + Goto("exit")), + Bloc("exit", + Exit("s2"))) + cse(f.f) + + runBench(b, func() { + dse(f.f) + }) +} + +func BenchmarkDeadStoreUnsafe(b *testing.B) { + cfg := testConfig(b) + ptr := cfg.config.Types.UInt64.PtrTo() + f := cfg.Fun("entry", + Bloc("entry", + Valu("start", OpInitMem, types.TypeMem, 0, nil), + Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil), + Valu("v", OpConstBool, cfg.config.Types.Bool, 1, nil), + Valu("a1", OpAddr, ptr, 0, nil, "sb"), + Valu("s1", OpStore, types.TypeMem, 0, cfg.config.Types.Int64, "a1", "v", "start"), + Valu("s2", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a1", "v", "s1"), + Goto("exit")), + Bloc("exit", + Exit("s2"))) + cse(f.f) + runBench(b, func() { + dse(f.f) + }) +} + +func BenchmarkDeadStoreSmallStructInit(b *testing.B) { + cfg := testConfig(b) + ptr := cfg.config.Types.BytePtr + + typ := types.NewStruct([]*types.Field{ + types.NewField(src.NoXPos, &types.Sym{Name: "A"}, cfg.config.Types.Int), + types.NewField(src.NoXPos, &types.Sym{Name: "B"}, cfg.config.Types.Int), + }) + tmp := cfg.Temp(typ) + + f := cfg.Fun("entry", + Bloc("entry", + Valu("start", OpInitMem, types.TypeMem, 0, nil), + Valu("sp", OpSP, cfg.config.Types.Uintptr, 0, nil), + Valu("zero", OpConst64, cfg.config.Types.Int, 0, nil), + + Valu("v6", OpLocalAddr, ptr, 0, tmp, "sp", "start"), + Valu("v3", OpOffPtr, ptr, 8, nil, "v6"), + Valu("v22", OpOffPtr, ptr, 0, nil, "v6"), + Valu("s1", OpStore, types.TypeMem, 0, cfg.config.Types.Int, "v22", "zero", "start"), + Valu("s2", OpStore, types.TypeMem, 0, cfg.config.Types.Int, "v3", "zero", "s1"), + + Valu("v8", OpLocalAddr, ptr, 0, tmp, "sp", "s2"), + Valu("v23", OpOffPtr, ptr, 8, nil, "v8"), + Valu("v25", OpOffPtr, ptr, 0, nil, "v8"), + Valu("s3", OpStore, types.TypeMem, 0, cfg.config.Types.Int, "v25", "zero", "s2"), + Valu("s4", OpStore, types.TypeMem, 0, cfg.config.Types.Int, "v23", "zero", "s3"), + Goto("exit")), + Bloc("exit", + Exit("s4"))) + cse(f.f) + + runBench(b, func() { + dse(f.f) + }) +} + +func BenchmarkDeadStoreLargeBlock(b *testing.B) { + // create a very large block with many shadowed stores + const ( + addrCount = 128 + // first 7 are dead + storesPerAddr = 8 + ) + cfg := testConfig(b) + ptrType := cfg.config.Types.BytePtr + boolType := cfg.config.Types.Bool + + items := []interface{}{ + Valu("start", OpInitMem, types.TypeMem, 0, nil), + Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil), + Valu("v", OpConstBool, boolType, 1, nil), + } + + for i := 0; i < addrCount; i++ { + items = append(items, + Valu(fmt.Sprintf("addr%d", i), OpAddr, ptrType, 0, nil, "sb"), + ) + } + + prev := "start" + for round := 0; round < storesPerAddr; round++ { + for i := 0; i < addrCount; i++ { + store := fmt.Sprintf("s_%03d_%d", i, round) + addr := fmt.Sprintf("addr%d", i) + items = append(items, + Valu(store, OpStore, types.TypeMem, 0, boolType, addr, "v", prev), + ) + prev = store + } + } + + items = append(items, Goto("exit")) + entryBlk := Bloc("entry", items...) + exitBlk := Bloc("exit", Exit(prev)) + + f := cfg.Fun("stress", entryBlk, exitBlk) + + runBench(b, func() { + dse(f.f) + }) +} + +func runBench(b *testing.B, build func()) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + build() + } +}