]> Cypherpunks repositories - gostls13.git/commitdiff
crypto/subtle,cmd/compile: add intrinsics for ConstantTimeSelect and *Eq
authorJorropo <jorropo.pgm@gmail.com>
Sun, 26 Oct 2025 21:19:30 +0000 (22:19 +0100)
committerJorropo <jorropo.pgm@gmail.com>
Tue, 28 Oct 2025 06:29:19 +0000 (23:29 -0700)
Targeting crypto/subtle rather than
crypto/internal/fips140/subtle after discussion with Filippo.

goos: linux
goarch: amd64
pkg: crypto/subtle
cpu: AMD Ryzen 5 3600 6-Core Processor
                        │ /tmp/old.logs │            /tmp/new.logs             │
                        │    sec/op     │    sec/op     vs base                │
ConstantTimeSelect-12      0.5246n ± 1%   0.5217n ± 2%        ~ (p=0.118 n=10)
ConstantTimeByteEq-12      1.0415n ± 1%   0.5202n ± 2%  -50.05% (p=0.000 n=10)
ConstantTimeEq-12          0.7813n ± 2%   0.7819n ± 0%        ~ (p=0.897 n=10)
ConstantTimeLessOrEq-12    1.0415n ± 3%   0.7813n ± 1%  -24.98% (p=0.000 n=10)
geomean                    0.8166n        0.6381n       -21.86%

The last three will become 1 lat-cycle (0.25ns) faster once #76066 is fixed.

The Select being that fast with the old code is really impressive.
I am pretty sure this happens because my CPU has BMI1&2 support and
a fusing unit able to translate non BMI code into BMI code.
This benchmark doesn't capture the CACHE gains from the shorter assembly.

It currently compiles as:
v17 = TESTQ <flags> v31 v31 // v != 0
v20 = CMOVQNE <int> v32 v33 v17 (y[int])

It is possible to remove the `TESTQ` by compiletime fusing it with the
compare in a pattern like this:
subtle.ConstantTimeSelect(subtle.ConstantTimeLessOrEq(left, right), right, left)

Saving 2 latency-cycles (1 with #76066 fixed).

Updates #76056

Change-Id: I61a1df99e97a1506f75dae13db529f43846d8f1e
Reviewed-on: https://go-review.googlesource.com/c/go/+/715045
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@google.com>
src/cmd/compile/internal/ssagen/intrinsics.go
src/cmd/compile/internal/ssagen/intrinsics_test.go
src/crypto/subtle/constant_time.go
src/crypto/subtle/constant_time_test.go

index e14db7b0a0c3e600d55ace16f0fd8f877e22ed7a..06887c934e7ba5b12badd874984dbe4d5efefdf5 100644 (file)
@@ -1602,6 +1602,36 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
                        return s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], out)
                },
                sys.AMD64)
+
+       /******** crypto/subtle ********/
+       // We implement a superset of the ConstantTimeSelect promise:
+       // ConstantTimeSelect returns x if v != 0 and y if v == 0.
+       add("crypto/subtle", "ConstantTimeSelect",
+               func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+                       v, x, y := args[0], args[1], args[2]
+
+                       var checkOp ssa.Op
+                       var zero *ssa.Value
+                       switch s.config.PtrSize {
+                       case 8:
+                               checkOp = ssa.OpNeq64
+                               zero = s.constInt64(types.Types[types.TINT], 0)
+                       case 4:
+                               checkOp = ssa.OpNeq32
+                               zero = s.constInt32(types.Types[types.TINT], 0)
+                       default:
+                               panic("unreachable")
+                       }
+                       check := s.newValue2(checkOp, types.Types[types.TBOOL], zero, v)
+
+                       return s.newValue3(ssa.OpCondSelect, types.Types[types.TINT], x, y, check)
+               },
+               sys.ArchAMD64, sys.ArchARM64, sys.ArchLoong64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchWasm) // all with CMOV support.
+       add("crypto/subtle", "constantTimeBoolToUint8",
+               func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+                       return s.newValue1(ssa.OpCvtBoolToUint8, types.Types[types.TUINT8], args[0])
+               },
+               all...)
 }
 
 // findIntrinsic returns a function which builds the SSA equivalent of the
index 0623c5f2098c4ebe5a09b2e0571f9e5ec821caa7..5a4e577fb6d5ded98f15d78aad39f81dafb74814 100644 (file)
@@ -41,6 +41,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"386", "math/bits", "TrailingZeros8"}:                             struct{}{},
        {"386", "runtime", "KeepAlive"}:                                    struct{}{},
        {"386", "runtime", "slicebytetostringtmp"}:                         struct{}{},
+       {"386", "crypto/subtle", "constantTimeBoolToUint8"}:                struct{}{},
        {"amd64", "internal/runtime/atomic", "And"}:                        struct{}{},
        {"amd64", "internal/runtime/atomic", "And32"}:                      struct{}{},
        {"amd64", "internal/runtime/atomic", "And64"}:                      struct{}{},
@@ -187,6 +188,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"amd64", "sync/atomic", "SwapUint32"}:                             struct{}{},
        {"amd64", "sync/atomic", "SwapUint64"}:                             struct{}{},
        {"amd64", "sync/atomic", "SwapUintptr"}:                            struct{}{},
+       {"amd64", "crypto/subtle", "ConstantTimeSelect"}:                   struct{}{},
+       {"amd64", "crypto/subtle", "constantTimeBoolToUint8"}:              struct{}{},
        {"arm", "internal/runtime/sys", "Bswap32"}:                         struct{}{},
        {"arm", "internal/runtime/sys", "Bswap64"}:                         struct{}{},
        {"arm", "internal/runtime/sys", "GetCallerPC"}:                     struct{}{},
@@ -214,6 +217,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"arm", "math/bits", "TrailingZeros8"}:                             struct{}{},
        {"arm", "runtime", "KeepAlive"}:                                    struct{}{},
        {"arm", "runtime", "slicebytetostringtmp"}:                         struct{}{},
+       {"arm", "crypto/subtle", "constantTimeBoolToUint8"}:                struct{}{},
        {"arm64", "internal/runtime/atomic", "And"}:                        struct{}{},
        {"arm64", "internal/runtime/atomic", "And32"}:                      struct{}{},
        {"arm64", "internal/runtime/atomic", "And64"}:                      struct{}{},
@@ -358,6 +362,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"arm64", "sync/atomic", "SwapUint32"}:                             struct{}{},
        {"arm64", "sync/atomic", "SwapUint64"}:                             struct{}{},
        {"arm64", "sync/atomic", "SwapUintptr"}:                            struct{}{},
+       {"arm64", "crypto/subtle", "ConstantTimeSelect"}:                   struct{}{},
+       {"arm64", "crypto/subtle", "constantTimeBoolToUint8"}:              struct{}{},
        {"loong64", "internal/runtime/atomic", "And"}:                      struct{}{},
        {"loong64", "internal/runtime/atomic", "And32"}:                    struct{}{},
        {"loong64", "internal/runtime/atomic", "And64"}:                    struct{}{},
@@ -504,6 +510,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"loong64", "sync/atomic", "SwapUint32"}:                           struct{}{},
        {"loong64", "sync/atomic", "SwapUint64"}:                           struct{}{},
        {"loong64", "sync/atomic", "SwapUintptr"}:                          struct{}{},
+       {"loong64", "crypto/subtle", "ConstantTimeSelect"}:                 struct{}{},
+       {"loong64", "crypto/subtle", "constantTimeBoolToUint8"}:            struct{}{},
        {"mips", "internal/runtime/atomic", "And"}:                         struct{}{},
        {"mips", "internal/runtime/atomic", "And8"}:                        struct{}{},
        {"mips", "internal/runtime/atomic", "Cas"}:                         struct{}{},
@@ -574,6 +582,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"mips", "sync/atomic", "SwapInt32"}:                               struct{}{},
        {"mips", "sync/atomic", "SwapUint32"}:                              struct{}{},
        {"mips", "sync/atomic", "SwapUintptr"}:                             struct{}{},
+       {"mips", "crypto/subtle", "constantTimeBoolToUint8"}:               struct{}{},
        {"mips64", "internal/runtime/atomic", "And"}:                       struct{}{},
        {"mips64", "internal/runtime/atomic", "And8"}:                      struct{}{},
        {"mips64", "internal/runtime/atomic", "Cas"}:                       struct{}{},
@@ -662,6 +671,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"mips64", "sync/atomic", "SwapUint32"}:                            struct{}{},
        {"mips64", "sync/atomic", "SwapUint64"}:                            struct{}{},
        {"mips64", "sync/atomic", "SwapUintptr"}:                           struct{}{},
+       {"mips64", "crypto/subtle", "constantTimeBoolToUint8"}:             struct{}{},
        {"mips64le", "internal/runtime/atomic", "And"}:                     struct{}{},
        {"mips64le", "internal/runtime/atomic", "And8"}:                    struct{}{},
        {"mips64le", "internal/runtime/atomic", "Cas"}:                     struct{}{},
@@ -750,6 +760,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"mips64le", "sync/atomic", "SwapUint32"}:                          struct{}{},
        {"mips64le", "sync/atomic", "SwapUint64"}:                          struct{}{},
        {"mips64le", "sync/atomic", "SwapUintptr"}:                         struct{}{},
+       {"mips64le", "crypto/subtle", "constantTimeBoolToUint8"}:           struct{}{},
        {"mipsle", "internal/runtime/atomic", "And"}:                       struct{}{},
        {"mipsle", "internal/runtime/atomic", "And8"}:                      struct{}{},
        {"mipsle", "internal/runtime/atomic", "Cas"}:                       struct{}{},
@@ -820,6 +831,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"mipsle", "sync/atomic", "SwapInt32"}:                             struct{}{},
        {"mipsle", "sync/atomic", "SwapUint32"}:                            struct{}{},
        {"mipsle", "sync/atomic", "SwapUintptr"}:                           struct{}{},
+       {"mipsle", "crypto/subtle", "constantTimeBoolToUint8"}:             struct{}{},
        {"ppc64", "internal/runtime/atomic", "And"}:                        struct{}{},
        {"ppc64", "internal/runtime/atomic", "And8"}:                       struct{}{},
        {"ppc64", "internal/runtime/atomic", "Cas"}:                        struct{}{},
@@ -944,6 +956,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"ppc64", "sync/atomic", "SwapUint32"}:                             struct{}{},
        {"ppc64", "sync/atomic", "SwapUint64"}:                             struct{}{},
        {"ppc64", "sync/atomic", "SwapUintptr"}:                            struct{}{},
+       {"ppc64", "crypto/subtle", "ConstantTimeSelect"}:                   struct{}{},
+       {"ppc64", "crypto/subtle", "constantTimeBoolToUint8"}:              struct{}{},
        {"ppc64le", "internal/runtime/atomic", "And"}:                      struct{}{},
        {"ppc64le", "internal/runtime/atomic", "And8"}:                     struct{}{},
        {"ppc64le", "internal/runtime/atomic", "Cas"}:                      struct{}{},
@@ -1068,6 +1082,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"ppc64le", "sync/atomic", "SwapUint32"}:                           struct{}{},
        {"ppc64le", "sync/atomic", "SwapUint64"}:                           struct{}{},
        {"ppc64le", "sync/atomic", "SwapUintptr"}:                          struct{}{},
+       {"ppc64le", "crypto/subtle", "ConstantTimeSelect"}:                 struct{}{},
+       {"ppc64le", "crypto/subtle", "constantTimeBoolToUint8"}:            struct{}{},
        {"riscv64", "internal/runtime/atomic", "And"}:                      struct{}{},
        {"riscv64", "internal/runtime/atomic", "And8"}:                     struct{}{},
        {"riscv64", "internal/runtime/atomic", "Cas"}:                      struct{}{},
@@ -1188,6 +1204,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"riscv64", "sync/atomic", "SwapUint32"}:                           struct{}{},
        {"riscv64", "sync/atomic", "SwapUint64"}:                           struct{}{},
        {"riscv64", "sync/atomic", "SwapUintptr"}:                          struct{}{},
+       {"riscv64", "crypto/subtle", "constantTimeBoolToUint8"}:            struct{}{},
        {"s390x", "internal/runtime/atomic", "And"}:                        struct{}{},
        {"s390x", "internal/runtime/atomic", "And8"}:                       struct{}{},
        {"s390x", "internal/runtime/atomic", "Cas"}:                        struct{}{},
@@ -1306,6 +1323,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"s390x", "sync/atomic", "SwapUint32"}:                             struct{}{},
        {"s390x", "sync/atomic", "SwapUint64"}:                             struct{}{},
        {"s390x", "sync/atomic", "SwapUintptr"}:                            struct{}{},
+       {"s390x", "crypto/subtle", "constantTimeBoolToUint8"}:              struct{}{},
        {"wasm", "internal/runtime/sys", "GetCallerPC"}:                    struct{}{},
        {"wasm", "internal/runtime/sys", "GetCallerSP"}:                    struct{}{},
        {"wasm", "internal/runtime/sys", "GetClosurePtr"}:                  struct{}{},
@@ -1341,6 +1359,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"wasm", "math/bits", "TrailingZeros8"}:                            struct{}{},
        {"wasm", "runtime", "KeepAlive"}:                                   struct{}{},
        {"wasm", "runtime", "slicebytetostringtmp"}:                        struct{}{},
+       {"wasm", "crypto/subtle", "ConstantTimeSelect"}:                    struct{}{},
+       {"wasm", "crypto/subtle", "constantTimeBoolToUint8"}:               struct{}{},
 }
 
 func TestIntrinsics(t *testing.T) {
index 22c1c64a0da9e012dcfc0bce18308746b3e652f4..8eeff3b629befb9ee91508de8d4849e2a28af31b 100644 (file)
@@ -13,34 +13,56 @@ import "crypto/internal/fips140/subtle"
 // is independent of the contents. If the lengths of x and y do not match it
 // returns 0 immediately.
 func ConstantTimeCompare(x, y []byte) int {
-       return subtle.ConstantTimeCompare(x, y)
+       if len(x) != len(y) {
+               return 0
+       }
+
+       var v byte
+
+       for i := 0; i < len(x); i++ {
+               v |= x[i] ^ y[i]
+       }
+
+       return ConstantTimeByteEq(v, 0)
 }
 
 // ConstantTimeSelect returns x if v == 1 and y if v == 0.
 // Its behavior is undefined if v takes any other value.
 func ConstantTimeSelect(v, x, y int) int {
-       return subtle.ConstantTimeSelect(v, x, y)
+       // This is intrinsicified on arches with CMOV.
+       // It implements the following superset behavior:
+       // ConstantTimeSelect returns x if v != 0 and y if v == 0.
+       // Do the same here to avoid non portable UB.
+       v = int(constantTimeBoolToUint8(v != 0))
+       return ^(v-1)&x | (v-1)&y
 }
 
 // ConstantTimeByteEq returns 1 if x == y and 0 otherwise.
 func ConstantTimeByteEq(x, y uint8) int {
-       return subtle.ConstantTimeByteEq(x, y)
+       return int(constantTimeBoolToUint8(x == y))
 }
 
 // ConstantTimeEq returns 1 if x == y and 0 otherwise.
 func ConstantTimeEq(x, y int32) int {
-       return subtle.ConstantTimeEq(x, y)
+       return int(constantTimeBoolToUint8(x == y))
 }
 
 // ConstantTimeCopy copies the contents of y into x (a slice of equal length)
 // if v == 1. If v == 0, x is left unchanged. Its behavior is undefined if v
 // takes any other value.
 func ConstantTimeCopy(v int, x, y []byte) {
+       // Forward this one since it gains nothing from compiler intrinsics.
        subtle.ConstantTimeCopy(v, x, y)
 }
 
 // ConstantTimeLessOrEq returns 1 if x <= y and 0 otherwise.
 // Its behavior is undefined if x or y are negative or > 2**31 - 1.
 func ConstantTimeLessOrEq(x, y int) int {
-       return subtle.ConstantTimeLessOrEq(x, y)
+       return int(constantTimeBoolToUint8(x <= y))
+}
+
+// constantTimeBoolToUint8 is a compiler intrinsic.
+// It returns 1 for true and 0 for false.
+func constantTimeBoolToUint8(b bool) uint8 {
+       panic("unreachable; must be intrinsicified")
 }
index c2ccd28ad70f5b8b4af665732770e6a4eb86446c..9db1140134fe23d2ed69843bcbdd255f1005c13b 100644 (file)
@@ -128,6 +128,17 @@ func TestConstantTimeLessOrEq(t *testing.T) {
 
 var benchmarkGlobal uint8
 
+func BenchmarkConstantTimeSelect(b *testing.B) {
+       x := int(benchmarkGlobal)
+       var y, z int
+
+       for range b.N {
+               y, z, x = ConstantTimeSelect(x, y, z), y, z
+       }
+
+       benchmarkGlobal = uint8(x)
+}
+
 func BenchmarkConstantTimeByteEq(b *testing.B) {
        var x, y uint8