diff options
author | David Benjamin <davidben@google.com> | 2020-12-25 12:02:55 -0500 |
---|---|---|
committer | Emmanuel Odeke <emmanuel@orijtech.com> | 2021-02-24 04:00:46 +0000 |
commit | 37805292550e7144200b09320ffb61f21d421f8d (patch) | |
tree | 6fa5fe849583a3bab0eaae772d74a84a4e79e888 /src/unicode | |
parent | 0694fb3d78f9ce2add154203dbd42a7a5a07c2da (diff) | |
download | go-git-37805292550e7144200b09320ffb61f21d421f8d.tar.gz |
unicode: correctly handle negative runes
Is and isExcludingLatin did not handle negative runes when dispatching
to is16. TestNegativeRune covers this along with the existing uint32
casts in IsGraphic, etc. (For tests, I picked the smallest non-Latin-1
code point in each range.)
Updates #43254
Change-Id: I17261b91f0d2b5b5125d19219411b45c480df74f
Reviewed-on: https://go-review.googlesource.com/c/go/+/280493
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Rob Pike <r@golang.org>
Trust: Emmanuel Odeke <emmanuel@orijtech.com>
Diffstat (limited to 'src/unicode')
-rw-r--r-- | src/unicode/letter.go | 6 | ||||
-rw-r--r-- | src/unicode/letter_test.go | 79 |
2 files changed, 83 insertions, 2 deletions
diff --git a/src/unicode/letter.go b/src/unicode/letter.go index a57566f0a5..268e457a87 100644 --- a/src/unicode/letter.go +++ b/src/unicode/letter.go @@ -154,7 +154,8 @@ func is32(ranges []Range32, r uint32) bool { // Is reports whether the rune is in the specified table of ranges. func Is(rangeTab *RangeTable, r rune) bool { r16 := rangeTab.R16 - if len(r16) > 0 && r <= rune(r16[len(r16)-1].Hi) { + // Compare as uint32 to correctly handle negative runes. + if len(r16) > 0 && uint32(r) <= uint32(r16[len(r16)-1].Hi) { return is16(r16, uint16(r)) } r32 := rangeTab.R32 @@ -166,7 +167,8 @@ func Is(rangeTab *RangeTable, r rune) bool { func isExcludingLatin(rangeTab *RangeTable, r rune) bool { r16 := rangeTab.R16 - if off := rangeTab.LatinOffset; len(r16) > off && r <= rune(r16[len(r16)-1].Hi) { + // Compare as uint32 to correctly handle negative runes. + if off := rangeTab.LatinOffset; len(r16) > off && uint32(r) <= uint32(r16[len(r16)-1].Hi) { return is16(r16[off:], uint16(r)) } r32 := rangeTab.R32 diff --git a/src/unicode/letter_test.go b/src/unicode/letter_test.go index 19ee535d57..a91e3a326f 100644 --- a/src/unicode/letter_test.go +++ b/src/unicode/letter_test.go @@ -563,3 +563,82 @@ func TestSpecialCaseNoMapping(t *testing.T) { t.Errorf("got %q; want %q", got, want) } } + +func TestNegativeRune(t *testing.T) { + // Issue 43254 + // These tests cover negative rune handling by testing values which, + // when cast to uint8 or uint16, look like a particular valid rune. + // This package has Latin-1-specific optimizations, so we test all of + // Latin-1 and representative non-Latin-1 values in the character + // categories covered by IsGraphic, etc. + nonLatin1 := []uint32{ + // Lu: LATIN CAPITAL LETTER A WITH MACRON + 0x0100, + // Ll: LATIN SMALL LETTER A WITH MACRON + 0x0101, + // Lt: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + 0x01C5, + // M: COMBINING GRAVE ACCENT + 0x0300, + // Nd: ARABIC-INDIC DIGIT ZERO + 0x0660, + // P: GREEK QUESTION MARK + 0x037E, + // S: MODIFIER LETTER LEFT ARROWHEAD + 0x02C2, + // Z: OGHAM SPACE MARK + 0x1680, + } + for i := 0; i < MaxLatin1+len(nonLatin1); i++ { + base := uint32(i) + if i >= MaxLatin1 { + base = nonLatin1[i-MaxLatin1] + } + + // Note r is negative, but uint8(r) == uint8(base) and + // uint16(r) == uint16(base). + r := rune(base - 1<<31) + if Is(Letter, r) { + t.Errorf("Is(Letter, 0x%x - 1<<31) = true, want false", base) + } + if IsControl(r) { + t.Errorf("IsControl(0x%x - 1<<31) = true, want false", base) + } + if IsDigit(r) { + t.Errorf("IsDigit(0x%x - 1<<31) = true, want false", base) + } + if IsGraphic(r) { + t.Errorf("IsGraphic(0x%x - 1<<31) = true, want false", base) + } + if IsLetter(r) { + t.Errorf("IsLetter(0x%x - 1<<31) = true, want false", base) + } + if IsLower(r) { + t.Errorf("IsLower(0x%x - 1<<31) = true, want false", base) + } + if IsMark(r) { + t.Errorf("IsMark(0x%x - 1<<31) = true, want false", base) + } + if IsNumber(r) { + t.Errorf("IsNumber(0x%x - 1<<31) = true, want false", base) + } + if IsPrint(r) { + t.Errorf("IsPrint(0x%x - 1<<31) = true, want false", base) + } + if IsPunct(r) { + t.Errorf("IsPunct(0x%x - 1<<31) = true, want false", base) + } + if IsSpace(r) { + t.Errorf("IsSpace(0x%x - 1<<31) = true, want false", base) + } + if IsSymbol(r) { + t.Errorf("IsSymbol(0x%x - 1<<31) = true, want false", base) + } + if IsTitle(r) { + t.Errorf("IsTitle(0x%x - 1<<31) = true, want false", base) + } + if IsUpper(r) { + t.Errorf("IsUpper(0x%x - 1<<31) = true, want false", base) + } + } +} |