]> Cypherpunks repositories - gostls13.git/commitdiff
crypto/x509,ecoding/asn1: better handling of weird encodings
authorRoland Shoemaker <roland@golang.org>
Fri, 21 Feb 2025 01:05:04 +0000 (17:05 -0800)
committerRoland Shoemaker <roland@golang.org>
Thu, 13 Mar 2025 23:42:59 +0000 (16:42 -0700)
For various cursed reasons we need to support the BMPString and
T61String ASN.1 string encodings. These types use the defunct UCS-2 and
T.61 character encodings respectively.

This change rejects some characters when decoding BMPStrings which are
not valid in UCS-2, and properly parses T61Strings instead of treating
them as plain UTF-8.

While still not perfect, this matches the behavior of most other
implementations, particularly BoringSSL. Ideally we'd just remove
support for these ASN.1 types (particularly in crypto/x509, where we
don't actually expose any API), but doing so is likely to break some
deploy certificates which unfortunately still use these types in DNs,
despite them being deprecated since 1999/2002.

Fixes #71862

Change-Id: Ib8f392656a35171e48eaf71a200be6d7605b2f02
Reviewed-on: https://go-review.googlesource.com/c/go/+/651275
Reviewed-by: Daniel McCarney <daniel@binaryparadox.net>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
src/crypto/x509/parser.go
src/crypto/x509/parser_test.go
src/encoding/asn1/asn1.go
src/encoding/asn1/asn1_test.go

index 6bea5cc77b7fc59a17fb4a77042338cbff2efee3..b99c776f09b93fa4fc3525e25356102bb92dfde3 100644 (file)
@@ -60,7 +60,21 @@ func isPrintable(b byte) bool {
 func parseASN1String(tag cryptobyte_asn1.Tag, value []byte) (string, error) {
        switch tag {
        case cryptobyte_asn1.T61String:
-               return string(value), nil
+               // T.61 is a defunct ITU 8-bit character encoding which preceded Unicode.
+               // T.61 uses a code page layout that _almost_ exactly maps to the code
+               // page layout of the ISO 8859-1 (Latin-1) character encoding, with the
+               // exception that a number of characters in Latin-1 are not present
+               // in T.61.
+               //
+               // Instead of mapping which characters are present in Latin-1 but not T.61,
+               // we just treat these strings as being encoded using Latin-1. This matches
+               // what most of the world does, including BoringSSL.
+               buf := make([]byte, 0, len(value))
+               for _, v := range value {
+                       // All the 1-byte UTF-8 runes map 1-1 with Latin-1.
+                       buf = utf8.AppendRune(buf, rune(v))
+               }
+               return string(buf), nil
        case cryptobyte_asn1.PrintableString:
                for _, b := range value {
                        if !isPrintable(b) {
@@ -74,6 +88,14 @@ func parseASN1String(tag cryptobyte_asn1.Tag, value []byte) (string, error) {
                }
                return string(value), nil
        case cryptobyte_asn1.Tag(asn1.TagBMPString):
+               // BMPString uses the defunct UCS-2 16-bit character encoding, which
+               // covers the Basic Multilingual Plane (BMP). UTF-16 was an extension of
+               // UCS-2, containing all of the same code points, but also including
+               // multi-code point characters (by using surrogate code points). We can
+               // treat a UCS-2 encoded string as a UTF-16 encoded string, as long as
+               // we reject out the UTF-16 specific code points. This matches the
+               // BoringSSL behavior.
+
                if len(value)%2 != 0 {
                        return "", errors.New("invalid BMPString")
                }
@@ -85,7 +107,16 @@ func parseASN1String(tag cryptobyte_asn1.Tag, value []byte) (string, error) {
 
                s := make([]uint16, 0, len(value)/2)
                for len(value) > 0 {
-                       s = append(s, uint16(value[0])<<8+uint16(value[1]))
+                       point := uint16(value[0])<<8 + uint16(value[1])
+                       // Reject UTF-16 code points that are permanently reserved
+                       // noncharacters (0xfffe, 0xffff, and 0xfdd0-0xfdef) and surrogates
+                       // (0xd800-0xdfff).
+                       if point == 0xfffe || point == 0xffff ||
+                               (point >= 0xfdd0 && point <= 0xfdef) ||
+                               (point >= 0xd800 && point <= 0xdfff) {
+                               return "", errors.New("invalid BMPString")
+                       }
+                       s = append(s, point)
                        value = value[2:]
                }
 
index 1ffc32daeffe21fcebc232f8d601d376ca0ce945..e7c1d87bfa8efe2a8f38f6da84082231b6d70e80 100644 (file)
@@ -24,8 +24,8 @@ func TestParseASN1String(t *testing.T) {
                {
                        name:     "T61String",
                        tag:      cryptobyte_asn1.T61String,
-                       value:    []byte{80, 81, 82},
-                       expected: string("PQR"),
+                       value:    []byte{0xbf, 0x61, 0x3f},
+                       expected: string("¿a?"),
                },
                {
                        name:     "PrintableString",
@@ -63,6 +63,30 @@ func TestParseASN1String(t *testing.T) {
                        value:       []byte{255},
                        expectedErr: "invalid BMPString",
                },
+               {
+                       name:        "BMPString (invalid surrogate)",
+                       tag:         cryptobyte_asn1.Tag(asn1.TagBMPString),
+                       value:       []byte{80, 81, 216, 1},
+                       expectedErr: "invalid BMPString",
+               },
+               {
+                       name:        "BMPString (invalid noncharacter 0xfdd1)",
+                       tag:         cryptobyte_asn1.Tag(asn1.TagBMPString),
+                       value:       []byte{80, 81, 253, 209},
+                       expectedErr: "invalid BMPString",
+               },
+               {
+                       name:        "BMPString (invalid noncharacter 0xffff)",
+                       tag:         cryptobyte_asn1.Tag(asn1.TagBMPString),
+                       value:       []byte{80, 81, 255, 255},
+                       expectedErr: "invalid BMPString",
+               },
+               {
+                       name:        "BMPString (invalid noncharacter 0xfffe)",
+                       tag:         cryptobyte_asn1.Tag(asn1.TagBMPString),
+                       value:       []byte{80, 81, 255, 254},
+                       expectedErr: "invalid BMPString",
+               },
                {
                        name:     "IA5String",
                        tag:      cryptobyte_asn1.IA5String,
index 488fb9b1e096f10be13d8fae567247799b78f519..4e3f85de1335a39b8b05017e63e163896ffbd7c8 100644 (file)
@@ -463,7 +463,21 @@ func parseIA5String(bytes []byte) (ret string, err error) {
 // parseT61String parses an ASN.1 T61String (8-bit clean string) from the given
 // byte slice and returns it.
 func parseT61String(bytes []byte) (ret string, err error) {
-       return string(bytes), nil
+       // T.61 is a defunct ITU 8-bit character encoding which preceded Unicode.
+       // T.61 uses a code page layout that _almost_ exactly maps to the code
+       // page layout of the ISO 8859-1 (Latin-1) character encoding, with the
+       // exception that a number of characters in Latin-1 are not present
+       // in T.61.
+       //
+       // Instead of mapping which characters are present in Latin-1 but not T.61,
+       // we just treat these strings as being encoded using Latin-1. This matches
+       // what most of the world does, including BoringSSL.
+       buf := make([]byte, 0, len(bytes))
+       for _, v := range bytes {
+               // All the 1-byte UTF-8 runes map 1-1 with Latin-1.
+               buf = utf8.AppendRune(buf, rune(v))
+       }
+       return string(buf), nil
 }
 
 // UTF8String
@@ -482,8 +496,16 @@ func parseUTF8String(bytes []byte) (ret string, err error) {
 // parseBMPString parses an ASN.1 BMPString (Basic Multilingual Plane of
 // ISO/IEC/ITU 10646-1) from the given byte slice and returns it.
 func parseBMPString(bmpString []byte) (string, error) {
+       // BMPString uses the defunct UCS-2 16-bit character encoding, which
+       // covers the Basic Multilingual Plane (BMP). UTF-16 was an extension of
+       // UCS-2, containing all of the same code points, but also including
+       // multi-code point characters (by using surrogate code points). We can
+       // treat a UCS-2 encoded string as a UTF-16 encoded string, as long as
+       // we reject out the UTF-16 specific code points. This matches the
+       // BoringSSL behavior.
+
        if len(bmpString)%2 != 0 {
-               return "", errors.New("pkcs12: odd-length BMP string")
+               return "", errors.New("invalid BMPString")
        }
 
        // Strip terminator if present.
@@ -493,7 +515,16 @@ func parseBMPString(bmpString []byte) (string, error) {
 
        s := make([]uint16, 0, len(bmpString)/2)
        for len(bmpString) > 0 {
-               s = append(s, uint16(bmpString[0])<<8+uint16(bmpString[1]))
+               point := uint16(bmpString[0])<<8 + uint16(bmpString[1])
+               // Reject UTF-16 code points that are permanently reserved
+               // noncharacters (0xfffe, 0xffff, and 0xfdd0-0xfdef) and surrogates
+               // (0xd800-0xdfff).
+               if point == 0xfffe || point == 0xffff ||
+                       (point >= 0xfdd0 && point <= 0xfdef) ||
+                       (point >= 0xd800 && point <= 0xdfff) {
+                       return "", errors.New("invalid BMPString")
+               }
+               s = append(s, point)
                bmpString = bmpString[2:]
        }
 
index 9a605e245c13698f324281ba3fdec9dffdba8184..60dae71df471c4fa7de27446f05aa88f9671355a 100644 (file)
@@ -506,6 +506,7 @@ var unmarshalTestData = []struct {
        {[]byte{0x30, 0x05, 0x02, 0x03, 0x12, 0x34, 0x56}, &TestBigInt{big.NewInt(0x123456)}},
        {[]byte{0x30, 0x0b, 0x31, 0x09, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x01, 0x03}, &TestSet{Ints: []int{1, 2, 3}}},
        {[]byte{0x12, 0x0b, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' '}, newString("0123456789 ")},
+       {[]byte{0x14, 0x03, 0xbf, 0x61, 0x3f}, newString("¿a?")},
 }
 
 func TestUnmarshal(t *testing.T) {
@@ -1126,34 +1127,43 @@ func TestTaggedRawValue(t *testing.T) {
 }
 
 var bmpStringTests = []struct {
+       name       string
        decoded    string
        encodedHex string
+       invalid    bool
 }{
-       {"", "0000"},
+       {"empty string", "", "0000", false},
        // Example from https://tools.ietf.org/html/rfc7292#appendix-B.
-       {"Beavis", "0042006500610076006900730000"},
+       {"rfc7292 example", "Beavis", "0042006500610076006900730000", false},
        // Some characters from the "Letterlike Symbols Unicode block".
-       {"\u2115 - Double-struck N", "21150020002d00200044006f00750062006c0065002d00730074007200750063006b0020004e0000"},
+       {"letterlike symbols", "\u2115 - Double-struck N", "21150020002d00200044006f00750062006c0065002d00730074007200750063006b0020004e0000", false},
+       {"invalid length", "", "ff", true},
+       {"invalid surrogate", "", "5051d801", true},
+       {"invalid noncharacter 0xfdd1", "", "5051fdd1", true},
+       {"invalid noncharacter 0xffff", "", "5051ffff", true},
+       {"invalid noncharacter 0xfffe", "", "5051fffe", true},
 }
 
 func TestBMPString(t *testing.T) {
-       for i, test := range bmpStringTests {
-               encoded, err := hex.DecodeString(test.encodedHex)
-               if err != nil {
-                       t.Fatalf("#%d: failed to decode from hex string", i)
-               }
+       for _, test := range bmpStringTests {
+               t.Run(test.name, func(t *testing.T) {
+                       encoded, err := hex.DecodeString(test.encodedHex)
+                       if err != nil {
+                               t.Fatalf("failed to decode from hex string: %s", err)
+                       }
 
-               decoded, err := parseBMPString(encoded)
+                       decoded, err := parseBMPString(encoded)
 
-               if err != nil {
-                       t.Errorf("#%d: decoding output gave an error: %s", i, err)
-                       continue
-               }
+                       if err != nil && !test.invalid {
+                               t.Errorf("parseBMPString failed: %s", err)
+                       } else if test.invalid && err == nil {
+                               t.Error("parseBMPString didn't fail as expected")
+                       }
 
-               if decoded != test.decoded {
-                       t.Errorf("#%d: decoding output resulted in %q, but it should have been %q", i, decoded, test.decoded)
-                       continue
-               }
+                       if decoded != test.decoded {
+                               t.Errorf("parseBMPString(%q): got %q, want %q", encoded, decoded, test.decoded)
+                       }
+               })
        }
 }