From: David Benjamin Date: Fri, 25 Dec 2020 17:02:55 +0000 (-0500) Subject: unicode: correctly handle negative runes X-Git-Tag: go1.17beta1~1406 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=3780529255;p=gostls13.git unicode: correctly handle negative runes Is and isExcludingLatin did not handle negative runes when dispatching to is16. TestNegativeRune covers this along with the existing uint32 casts in IsGraphic, etc. (For tests, I picked the smallest non-Latin-1 code point in each range.) Updates #43254 Change-Id: I17261b91f0d2b5b5125d19219411b45c480df74f Reviewed-on: https://go-review.googlesource.com/c/go/+/280493 Run-TryBot: Rob Pike TryBot-Result: Go Bot Reviewed-by: Rob Pike Trust: Emmanuel Odeke --- diff --git a/src/unicode/letter.go b/src/unicode/letter.go index a57566f0a5..268e457a87 100644 --- a/src/unicode/letter.go +++ b/src/unicode/letter.go @@ -154,7 +154,8 @@ func is32(ranges []Range32, r uint32) bool { // Is reports whether the rune is in the specified table of ranges. func Is(rangeTab *RangeTable, r rune) bool { r16 := rangeTab.R16 - if len(r16) > 0 && r <= rune(r16[len(r16)-1].Hi) { + // Compare as uint32 to correctly handle negative runes. + if len(r16) > 0 && uint32(r) <= uint32(r16[len(r16)-1].Hi) { return is16(r16, uint16(r)) } r32 := rangeTab.R32 @@ -166,7 +167,8 @@ func Is(rangeTab *RangeTable, r rune) bool { func isExcludingLatin(rangeTab *RangeTable, r rune) bool { r16 := rangeTab.R16 - if off := rangeTab.LatinOffset; len(r16) > off && r <= rune(r16[len(r16)-1].Hi) { + // Compare as uint32 to correctly handle negative runes. + if off := rangeTab.LatinOffset; len(r16) > off && uint32(r) <= uint32(r16[len(r16)-1].Hi) { return is16(r16[off:], uint16(r)) } r32 := rangeTab.R32 diff --git a/src/unicode/letter_test.go b/src/unicode/letter_test.go index 19ee535d57..a91e3a326f 100644 --- a/src/unicode/letter_test.go +++ b/src/unicode/letter_test.go @@ -563,3 +563,82 @@ func TestSpecialCaseNoMapping(t *testing.T) { t.Errorf("got %q; want %q", got, want) } } + +func TestNegativeRune(t *testing.T) { + // Issue 43254 + // These tests cover negative rune handling by testing values which, + // when cast to uint8 or uint16, look like a particular valid rune. + // This package has Latin-1-specific optimizations, so we test all of + // Latin-1 and representative non-Latin-1 values in the character + // categories covered by IsGraphic, etc. + nonLatin1 := []uint32{ + // Lu: LATIN CAPITAL LETTER A WITH MACRON + 0x0100, + // Ll: LATIN SMALL LETTER A WITH MACRON + 0x0101, + // Lt: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + 0x01C5, + // M: COMBINING GRAVE ACCENT + 0x0300, + // Nd: ARABIC-INDIC DIGIT ZERO + 0x0660, + // P: GREEK QUESTION MARK + 0x037E, + // S: MODIFIER LETTER LEFT ARROWHEAD + 0x02C2, + // Z: OGHAM SPACE MARK + 0x1680, + } + for i := 0; i < MaxLatin1+len(nonLatin1); i++ { + base := uint32(i) + if i >= MaxLatin1 { + base = nonLatin1[i-MaxLatin1] + } + + // Note r is negative, but uint8(r) == uint8(base) and + // uint16(r) == uint16(base). + r := rune(base - 1<<31) + if Is(Letter, r) { + t.Errorf("Is(Letter, 0x%x - 1<<31) = true, want false", base) + } + if IsControl(r) { + t.Errorf("IsControl(0x%x - 1<<31) = true, want false", base) + } + if IsDigit(r) { + t.Errorf("IsDigit(0x%x - 1<<31) = true, want false", base) + } + if IsGraphic(r) { + t.Errorf("IsGraphic(0x%x - 1<<31) = true, want false", base) + } + if IsLetter(r) { + t.Errorf("IsLetter(0x%x - 1<<31) = true, want false", base) + } + if IsLower(r) { + t.Errorf("IsLower(0x%x - 1<<31) = true, want false", base) + } + if IsMark(r) { + t.Errorf("IsMark(0x%x - 1<<31) = true, want false", base) + } + if IsNumber(r) { + t.Errorf("IsNumber(0x%x - 1<<31) = true, want false", base) + } + if IsPrint(r) { + t.Errorf("IsPrint(0x%x - 1<<31) = true, want false", base) + } + if IsPunct(r) { + t.Errorf("IsPunct(0x%x - 1<<31) = true, want false", base) + } + if IsSpace(r) { + t.Errorf("IsSpace(0x%x - 1<<31) = true, want false", base) + } + if IsSymbol(r) { + t.Errorf("IsSymbol(0x%x - 1<<31) = true, want false", base) + } + if IsTitle(r) { + t.Errorf("IsTitle(0x%x - 1<<31) = true, want false", base) + } + if IsUpper(r) { + t.Errorf("IsUpper(0x%x - 1<<31) = true, want false", base) + } + } +}