]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: add wasm bulk memory operations
authorGaret Halliday <me@garet.holiday>
Sat, 22 Oct 2022 03:22:12 +0000 (22:22 -0500)
committerGopher Robot <gobot@golang.org>
Thu, 27 Oct 2022 10:37:01 +0000 (10:37 +0000)
The existing implementation uses loops to implement bulk memory
operations such as memcpy and memclr. Now that bulk memory operations
have been standardized and are implemented in all major browsers and
engines (see https://webassembly.org/roadmap/), we should use them
to improve performance.

Updates #28360

Change-Id: I28df0e0350287d5e7e1d1c09a4064ea1054e7575
Reviewed-on: https://go-review.googlesource.com/c/go/+/444935
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Richard Musiol <neelance@gmail.com>
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Richard Musiol <neelance@gmail.com>
Reviewed-by: Richard Musiol <neelance@gmail.com>
15 files changed:
src/cmd/compile/internal/ir/symtab.go
src/cmd/compile/internal/ssa/_gen/Wasm.rules
src/cmd/compile/internal/ssa/_gen/WasmOps.go
src/cmd/compile/internal/ssa/rewriteWasm.go
src/cmd/compile/internal/ssagen/ssa.go
src/cmd/compile/internal/wasm/ssa.go
src/cmd/internal/obj/wasm/a.out.go
src/cmd/internal/obj/wasm/anames.go
src/cmd/internal/obj/wasm/wasmobj.go
src/cmd/link/internal/wasm/asm.go
src/runtime/asm_wasm.s
src/runtime/memclr_wasm.s
src/runtime/memmove_wasm.s
src/runtime/sys_wasm.go
src/runtime/sys_wasm.s

index 2c89e677b480fcf04d8d374b00cfb8b550d47a77..bde7a4cfe4b47cec2ed676a2efcb94d30e226b84 100644 (file)
@@ -58,10 +58,6 @@ var Syms struct {
        // Wasm
        WasmDiv *obj.LSym
        // Wasm
-       WasmMove *obj.LSym
-       // Wasm
-       WasmZero *obj.LSym
-       // Wasm
        WasmTruncS *obj.LSym
        // Wasm
        WasmTruncU *obj.LSym
index 9e683b116c1613423505e365cc7ffc3bfd071901..a9ed82e45669d54e450b4913598ada1b81c9929d 100644 (file)
        (I64Store [s-8] dst (I64Load [s-8] src mem)
                (I64Store dst (I64Load src mem) mem))
 
-// Adjust moves to be a multiple of 16 bytes.
-(Move [s] dst src mem)
-       && s > 16 && s%16 != 0 && s%16 <= 8 =>
-       (Move [s-s%16]
-               (OffPtr <dst.Type> dst [s%16])
-               (OffPtr <src.Type> src [s%16])
-               (I64Store dst (I64Load src mem) mem))
-(Move [s] dst src mem)
-       && s > 16 && s%16 != 0 && s%16 > 8 =>
-       (Move [s-s%16]
-               (OffPtr <dst.Type> dst [s%16])
-               (OffPtr <src.Type> src [s%16])
-               (I64Store [8] dst (I64Load [8] src mem)
-                       (I64Store dst (I64Load src mem) mem)))
-
 // Large copying uses helper.
-(Move [s] dst src mem) && s%8 == 0 && logLargeCopy(v, s) =>
-       (LoweredMove [s/8] dst src mem)
+(Move [s] dst src mem) && logLargeCopy(v, s) =>
+       (LoweredMove [s] dst src mem)
 
 // Lowering Zero instructions
 (Zero [0] _ mem) => mem
                (I64Store32 destptr (I64Const [0]) mem))
 
 // Strip off any fractional word zeroing.
-(Zero [s] destptr mem) && s%8 != 0 && s > 8 =>
+(Zero [s] destptr mem) && s%8 != 0 && s > 8 && s < 32 =>
        (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
                (I64Store destptr (I64Const [0]) mem))
 
                                (I64Store destptr (I64Const [0]) mem))))
 
 // Large zeroing uses helper.
-(Zero [s] destptr mem) && s%8 == 0 && s > 32 =>
-       (LoweredZero [s/8] destptr mem)
+(Zero [s] destptr mem) =>
+       (LoweredZero [s] destptr mem)
 
 // Lowering constants
 (Const64 ...) => (I64Const ...)
index 33529e729d37061e6527810e68d308dbc2cdd35f..cd127b5f06ce1f9b1e301fb4cb4aaadf4cf8aacf 100644 (file)
@@ -126,8 +126,8 @@ func init() {
                {name: "LoweredInterCall", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", call: true},          // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem
 
                {name: "LoweredAddr", argLength: 1, reg: gp11, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // returns base+aux+auxint, arg0=base
-               {name: "LoweredMove", argLength: 3, reg: regInfo{inputs: []regMask{gp, gp}}, aux: "Int64"},                // large move. arg0=dst, arg1=src, arg2=mem, auxint=len/8, returns mem
-               {name: "LoweredZero", argLength: 2, reg: regInfo{inputs: []regMask{gp}}, aux: "Int64"},                    // large zeroing. arg0=start, arg1=mem, auxint=len/8, returns mem
+               {name: "LoweredMove", argLength: 3, reg: regInfo{inputs: []regMask{gp, gp}}, aux: "Int64"},                // large move. arg0=dst, arg1=src, arg2=mem, auxint=len, returns mem
+               {name: "LoweredZero", argLength: 2, reg: regInfo{inputs: []regMask{gp}}, aux: "Int64"},                    // large zeroing. arg0=start, arg1=mem, auxint=len, returns mem
 
                {name: "LoweredGetClosurePtr", reg: gp01},                                                                          // returns wasm.REG_CTXT, the closure pointer
                {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},                                                   // returns the PC of the caller of the current function
index 5b7f4a8e452518427b84f71569f1aa01bd64b2b6..a5be7922a0ea8795a3a9dc383e637459ecfce038 100644 (file)
@@ -2141,76 +2141,18 @@ func rewriteValueWasm_OpMove(v *Value) bool {
                return true
        }
        // match: (Move [s] dst src mem)
-       // cond: s > 16 && s%16 != 0 && s%16 <= 8
-       // result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (I64Store dst (I64Load src mem) mem))
+       // cond: logLargeCopy(v, s)
+       // result: (LoweredMove [s] dst src mem)
        for {
                s := auxIntToInt64(v.AuxInt)
                dst := v_0
                src := v_1
                mem := v_2
-               if !(s > 16 && s%16 != 0 && s%16 <= 8) {
-                       break
-               }
-               v.reset(OpMove)
-               v.AuxInt = int64ToAuxInt(s - s%16)
-               v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-               v0.AuxInt = int64ToAuxInt(s % 16)
-               v0.AddArg(dst)
-               v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-               v1.AuxInt = int64ToAuxInt(s % 16)
-               v1.AddArg(src)
-               v2 := b.NewValue0(v.Pos, OpWasmI64Store, types.TypeMem)
-               v3 := b.NewValue0(v.Pos, OpWasmI64Load, typ.UInt64)
-               v3.AddArg2(src, mem)
-               v2.AddArg3(dst, v3, mem)
-               v.AddArg3(v0, v1, v2)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s > 16 && s%16 != 0 && s%16 > 8
-       // result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (I64Store [8] dst (I64Load [8] src mem) (I64Store dst (I64Load src mem) mem)))
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s > 16 && s%16 != 0 && s%16 > 8) {
-                       break
-               }
-               v.reset(OpMove)
-               v.AuxInt = int64ToAuxInt(s - s%16)
-               v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-               v0.AuxInt = int64ToAuxInt(s % 16)
-               v0.AddArg(dst)
-               v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-               v1.AuxInt = int64ToAuxInt(s % 16)
-               v1.AddArg(src)
-               v2 := b.NewValue0(v.Pos, OpWasmI64Store, types.TypeMem)
-               v2.AuxInt = int64ToAuxInt(8)
-               v3 := b.NewValue0(v.Pos, OpWasmI64Load, typ.UInt64)
-               v3.AuxInt = int64ToAuxInt(8)
-               v3.AddArg2(src, mem)
-               v4 := b.NewValue0(v.Pos, OpWasmI64Store, types.TypeMem)
-               v5 := b.NewValue0(v.Pos, OpWasmI64Load, typ.UInt64)
-               v5.AddArg2(src, mem)
-               v4.AddArg3(dst, v5, mem)
-               v2.AddArg3(dst, v3, v4)
-               v.AddArg3(v0, v1, v2)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s%8 == 0 && logLargeCopy(v, s)
-       // result: (LoweredMove [s/8] dst src mem)
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s%8 == 0 && logLargeCopy(v, s)) {
+               if !(logLargeCopy(v, s)) {
                        break
                }
                v.reset(OpWasmLoweredMove)
-               v.AuxInt = int64ToAuxInt(s / 8)
+               v.AuxInt = int64ToAuxInt(s)
                v.AddArg3(dst, src, mem)
                return true
        }
@@ -4656,13 +4598,13 @@ func rewriteValueWasm_OpZero(v *Value) bool {
                return true
        }
        // match: (Zero [s] destptr mem)
-       // cond: s%8 != 0 && s > 8
+       // cond: s%8 != 0 && s > 8 && s < 32
        // result: (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) (I64Store destptr (I64Const [0]) mem))
        for {
                s := auxIntToInt64(v.AuxInt)
                destptr := v_0
                mem := v_1
-               if !(s%8 != 0 && s > 8) {
+               if !(s%8 != 0 && s > 8 && s < 32) {
                        break
                }
                v.reset(OpZero)
@@ -4738,21 +4680,16 @@ func rewriteValueWasm_OpZero(v *Value) bool {
                return true
        }
        // match: (Zero [s] destptr mem)
-       // cond: s%8 == 0 && s > 32
-       // result: (LoweredZero [s/8] destptr mem)
+       // result: (LoweredZero [s] destptr mem)
        for {
                s := auxIntToInt64(v.AuxInt)
                destptr := v_0
                mem := v_1
-               if !(s%8 == 0 && s > 32) {
-                       break
-               }
                v.reset(OpWasmLoweredZero)
-               v.AuxInt = int64ToAuxInt(s / 8)
+               v.AuxInt = int64ToAuxInt(s)
                v.AddArg2(destptr, mem)
                return true
        }
-       return false
 }
 func rewriteValueWasm_OpZeroExt16to32(v *Value) bool {
        v_0 := v.Args[0]
index 871530d7d59b31bf5f7792562b126d71b4046946..7966cd486075cbb627975d6cf2e108f81e03a0f7 100644 (file)
@@ -206,8 +206,6 @@ func InitConfig() {
        }
 
        // Wasm (all asm funcs with special ABIs)
-       ir.Syms.WasmMove = typecheck.LookupRuntimeVar("wasmMove")
-       ir.Syms.WasmZero = typecheck.LookupRuntimeVar("wasmZero")
        ir.Syms.WasmDiv = typecheck.LookupRuntimeVar("wasmDiv")
        ir.Syms.WasmTruncS = typecheck.LookupRuntimeVar("wasmTruncS")
        ir.Syms.WasmTruncU = typecheck.LookupRuntimeVar("wasmTruncU")
index 765051c9445b293e84f372fcd890598c048f3ba3..27ba98c9cd0a6ffb50976384874507f5ba838ba8 100644 (file)
@@ -149,14 +149,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                getValue32(s, v.Args[0])
                getValue32(s, v.Args[1])
                i32Const(s, int32(v.AuxInt))
-               p := s.Prog(wasm.ACall)
-               p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: ir.Syms.WasmMove}
+               s.Prog(wasm.AMemoryCopy)
 
        case ssa.OpWasmLoweredZero:
                getValue32(s, v.Args[0])
+               i32Const(s, 0)
                i32Const(s, int32(v.AuxInt))
-               p := s.Prog(wasm.ACall)
-               p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: ir.Syms.WasmZero}
+               s.Prog(wasm.AMemoryFill)
 
        case ssa.OpWasmLoweredNilCheck:
                getValue64(s, v.Args[0])
index 72ecaa92860b067f9bdd4a715dfd1c9b430dce1a..83ce0a67385e707a0c2b89acbf71c0c68c121466 100644 (file)
@@ -231,6 +231,17 @@ const (
        AI64TruncSatF64S
        AI64TruncSatF64U
 
+       AMemoryInit
+       ADataDrop
+       AMemoryCopy
+       AMemoryFill
+       ATableInit
+       AElemDrop
+       ATableCopy
+       ATableGrow
+       ATableSize
+       ATableFill
+
        ALast // Sentinel: End of low-level WebAssembly instructions.
 
        ARESUMEPOINT
index 94123849ee030223abe391608776133749606b14..c9bc15d27007bce783c74527cfe044e3984416ef 100644 (file)
@@ -195,6 +195,16 @@ var Anames = []string{
        "I64TruncSatF32U",
        "I64TruncSatF64S",
        "I64TruncSatF64U",
+       "MemoryInit",
+       "DataDrop",
+       "MemoryCopy",
+       "MemoryFill",
+       "TableInit",
+       "ElemDrop",
+       "TableCopy",
+       "TableGrow",
+       "TableSize",
+       "TableFill",
        "Last",
        "RESUMEPOINT",
        "CALLNORESUME",
index 59b2e7bddc7bdadbfe0979b02bb9a44965639181..9b0aabe919538b68af5db6ea7a26f507ba2f3a18 100644 (file)
@@ -799,8 +799,6 @@ var notUsePC_B = map[string]bool{
        "wasm_export_resume":     true,
        "wasm_export_getsp":      true,
        "wasm_pc_f_loop":         true,
-       "runtime.wasmMove":       true,
-       "runtime.wasmZero":       true,
        "runtime.wasmDiv":        true,
        "runtime.wasmTruncS":     true,
        "runtime.wasmTruncU":     true,
@@ -844,7 +842,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
        // Some functions use a special calling convention.
        switch s.Name {
        case "_rt0_wasm_js", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp", "wasm_pc_f_loop",
-               "runtime.wasmMove", "runtime.wasmZero", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
+               "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
                varDecls = []*varDecl{}
                useAssemblyRegMap()
        case "memchr", "memcmp":
@@ -1088,7 +1086,11 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
                        writeUleb128(w, align(p.As))
                        writeUleb128(w, uint64(p.To.Offset))
 
-               case ACurrentMemory, AGrowMemory:
+               case ACurrentMemory, AGrowMemory, AMemoryFill:
+                       w.WriteByte(0x00)
+
+               case AMemoryCopy:
+                       w.WriteByte(0x00)
                        w.WriteByte(0x00)
 
                }
index 0aa065f99de64fc67c2aebf54a85a4c28f2b7d25..99018c807916bc5c39a59752bc0c91b9380a0d6b 100644 (file)
@@ -60,8 +60,6 @@ var wasmFuncTypes = map[string]*wasmFuncType{
        "wasm_export_resume":     {Params: []byte{}},                                         //
        "wasm_export_getsp":      {Results: []byte{I32}},                                     // sp
        "wasm_pc_f_loop":         {Params: []byte{}},                                         //
-       "runtime.wasmMove":       {Params: []byte{I32, I32, I32}},                            // dst, src, len
-       "runtime.wasmZero":       {Params: []byte{I32, I32}},                                 // ptr, len
        "runtime.wasmDiv":        {Params: []byte{I64, I64}, Results: []byte{I64}},           // x, y -> x/y
        "runtime.wasmTruncS":     {Params: []byte{F64}, Results: []byte{I64}},                // x -> int(x)
        "runtime.wasmTruncU":     {Params: []byte{F64}, Results: []byte{I64}},                // x -> uint(x)
index d885da6e70f4398d36e04b7a4ac25613d749048d..e075c7259885999212b4032f5ebb3753ad9a5fc3 100644 (file)
@@ -320,10 +320,8 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
                I64Load stackArgs+16(FP); \
                I32WrapI64; \
                I64Load stackArgsSize+24(FP); \
-               I64Const $3; \
-               I64ShrU; \
                I32WrapI64; \
-               Call runtime·wasmMove(SB); \
+               MemoryCopy; \
        End; \
        \
        MOVD f+8(FP), CTXT; \
index 5a053049f8154eaabe70d92280e7ec6c1edf48d1..19d08ffbee2ac9ba24fe283defb8fac66da9f81c 100644 (file)
@@ -11,29 +11,10 @@ TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT, $0-16
        MOVD ptr+0(FP), R0
        MOVD n+8(FP), R1
 
-loop:
-       Loop
-               Get R1
-               I64Eqz
-               If
-                       RET
-               End
-
-               Get R0
-               I32WrapI64
-               I64Const $0
-               I64Store8 $0
-
-               Get R0
-               I64Const $1
-               I64Add
-               Set R0
-
-               Get R1
-               I64Const $1
-               I64Sub
-               Set R1
-
-               Br loop
-       End
-       UNDEF
+       Get R0
+       I32WrapI64
+       I32Const $0
+       Get R1
+       I32WrapI64
+       MemoryFill
+       RET
index 8525fea35ef4d558d67cf478444f7991b40861fb..1be8487a9906312e623090bdcb528a537b65d1f9 100644 (file)
@@ -13,142 +13,10 @@ TEXT runtime·memmove(SB), NOSPLIT, $0-24
        MOVD n+16(FP), R2
 
        Get R0
+       I32WrapI64
        Get R1
-       I64LtU
-       If // forward
-exit_forward_64:
-               Block
-loop_forward_64:
-                       Loop
-                               Get R2
-                               I64Const $8
-                               I64LtU
-                               BrIf exit_forward_64
-
-                               MOVD 0(R1), 0(R0)
-
-                               Get R0
-                               I64Const $8
-                               I64Add
-                               Set R0
-
-                               Get R1
-                               I64Const $8
-                               I64Add
-                               Set R1
-
-                               Get R2
-                               I64Const $8
-                               I64Sub
-                               Set R2
-
-                               Br loop_forward_64
-                       End
-               End
-
-loop_forward_8:
-               Loop
-                       Get R2
-                       I64Eqz
-                       If
-                               RET
-                       End
-
-                       Get R0
-                       I32WrapI64
-                       I64Load8U (R1)
-                       I64Store8 $0
-
-                       Get R0
-                       I64Const $1
-                       I64Add
-                       Set R0
-
-                       Get R1
-                       I64Const $1
-                       I64Add
-                       Set R1
-
-                       Get R2
-                       I64Const $1
-                       I64Sub
-                       Set R2
-
-                       Br loop_forward_8
-               End
-
-       Else
-               // backward
-               Get R0
-               Get R2
-               I64Add
-               Set R0
-
-               Get R1
-               Get R2
-               I64Add
-               Set R1
-
-exit_backward_64:
-               Block
-loop_backward_64:
-                       Loop
-                               Get R2
-                               I64Const $8
-                               I64LtU
-                               BrIf exit_backward_64
-
-                               Get R0
-                               I64Const $8
-                               I64Sub
-                               Set R0
-
-                               Get R1
-                               I64Const $8
-                               I64Sub
-                               Set R1
-
-                               Get R2
-                               I64Const $8
-                               I64Sub
-                               Set R2
-
-                               MOVD 0(R1), 0(R0)
-
-                               Br loop_backward_64
-                       End
-               End
-
-loop_backward_8:
-               Loop
-                       Get R2
-                       I64Eqz
-                       If
-                               RET
-                       End
-
-                       Get R0
-                       I64Const $1
-                       I64Sub
-                       Set R0
-
-                       Get R1
-                       I64Const $1
-                       I64Sub
-                       Set R1
-
-                       Get R2
-                       I64Const $1
-                       I64Sub
-                       Set R2
-
-                       Get R0
-                       I32WrapI64
-                       I64Load8U (R1)
-                       I64Store8 $0
-
-                       Br loop_backward_8
-               End
-       End
-
-       UNDEF
+       I32WrapI64
+       Get R2
+       I32WrapI64
+       MemoryCopy
+       RET
index e6e7f471eea9f80c082977e1e5bb165728ac310b..bf5756984ae1d65f7df12a51c01b5aa88d1a1220 100644 (file)
@@ -16,10 +16,6 @@ type m0Stack struct {
 
 var wasmStack m0Stack
 
-func wasmMove()
-
-func wasmZero()
-
 func wasmDiv()
 
 func wasmTruncS()
index 164dd16ec9ff13a88a0abbf59bbc4e7803d0aca6..f706e00ab285b307a6efebead432a902540f6157 100644 (file)
@@ -4,73 +4,6 @@
 
 #include "textflag.h"
 
-TEXT runtime·wasmMove(SB), NOSPLIT, $0-0
-loop:
-       Loop
-               // *dst = *src
-               Get R0
-               Get R1
-               I64Load $0
-               I64Store $0
-
-               // n--
-               Get R2
-               I32Const $1
-               I32Sub
-               Tee R2
-
-               // n == 0
-               I32Eqz
-               If
-                       Return
-               End
-
-               // dst += 8
-               Get R0
-               I32Const $8
-               I32Add
-               Set R0
-
-               // src += 8
-               Get R1
-               I32Const $8
-               I32Add
-               Set R1
-
-               Br loop
-       End
-       UNDEF
-
-TEXT runtime·wasmZero(SB), NOSPLIT, $0-0
-loop:
-       Loop
-               // *dst = 0
-               Get R0
-               I64Const $0
-               I64Store $0
-
-               // n--
-               Get R1
-               I32Const $1
-               I32Sub
-               Tee R1
-
-               // n == 0
-               I32Eqz
-               If
-                       Return
-               End
-
-               // dst += 8
-               Get R0
-               I32Const $8
-               I32Add
-               Set R0
-
-               Br loop
-       End
-       UNDEF
-
 TEXT runtime·wasmDiv(SB), NOSPLIT, $0-0
        Get R0
        I64Const $-0x8000000000000000