summaryrefslogtreecommitdiff
path: root/src/unicode
diff options
context:
space:
mode:
authorDavid Benjamin <davidben@google.com>2020-12-25 12:02:55 -0500
committerEmmanuel Odeke <emmanuel@orijtech.com>2021-02-24 04:00:46 +0000
commit37805292550e7144200b09320ffb61f21d421f8d (patch)
tree6fa5fe849583a3bab0eaae772d74a84a4e79e888 /src/unicode
parent0694fb3d78f9ce2add154203dbd42a7a5a07c2da (diff)
downloadgo-git-37805292550e7144200b09320ffb61f21d421f8d.tar.gz
unicode: correctly handle negative runes
Is and isExcludingLatin did not handle negative runes when dispatching to is16. TestNegativeRune covers this along with the existing uint32 casts in IsGraphic, etc. (For tests, I picked the smallest non-Latin-1 code point in each range.) Updates #43254 Change-Id: I17261b91f0d2b5b5125d19219411b45c480df74f Reviewed-on: https://go-review.googlesource.com/c/go/+/280493 Run-TryBot: Rob Pike <r@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Rob Pike <r@golang.org> Trust: Emmanuel Odeke <emmanuel@orijtech.com>
Diffstat (limited to 'src/unicode')
-rw-r--r--src/unicode/letter.go6
-rw-r--r--src/unicode/letter_test.go79
2 files changed, 83 insertions, 2 deletions
diff --git a/src/unicode/letter.go b/src/unicode/letter.go
index a57566f0a5..268e457a87 100644
--- a/src/unicode/letter.go
+++ b/src/unicode/letter.go
@@ -154,7 +154,8 @@ func is32(ranges []Range32, r uint32) bool {
// Is reports whether the rune is in the specified table of ranges.
func Is(rangeTab *RangeTable, r rune) bool {
r16 := rangeTab.R16
- if len(r16) > 0 && r <= rune(r16[len(r16)-1].Hi) {
+ // Compare as uint32 to correctly handle negative runes.
+ if len(r16) > 0 && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
return is16(r16, uint16(r))
}
r32 := rangeTab.R32
@@ -166,7 +167,8 @@ func Is(rangeTab *RangeTable, r rune) bool {
func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
r16 := rangeTab.R16
- if off := rangeTab.LatinOffset; len(r16) > off && r <= rune(r16[len(r16)-1].Hi) {
+ // Compare as uint32 to correctly handle negative runes.
+ if off := rangeTab.LatinOffset; len(r16) > off && uint32(r) <= uint32(r16[len(r16)-1].Hi) {
return is16(r16[off:], uint16(r))
}
r32 := rangeTab.R32
diff --git a/src/unicode/letter_test.go b/src/unicode/letter_test.go
index 19ee535d57..a91e3a326f 100644
--- a/src/unicode/letter_test.go
+++ b/src/unicode/letter_test.go
@@ -563,3 +563,82 @@ func TestSpecialCaseNoMapping(t *testing.T) {
t.Errorf("got %q; want %q", got, want)
}
}
+
+func TestNegativeRune(t *testing.T) {
+ // Issue 43254
+ // These tests cover negative rune handling by testing values which,
+ // when cast to uint8 or uint16, look like a particular valid rune.
+ // This package has Latin-1-specific optimizations, so we test all of
+ // Latin-1 and representative non-Latin-1 values in the character
+ // categories covered by IsGraphic, etc.
+ nonLatin1 := []uint32{
+ // Lu: LATIN CAPITAL LETTER A WITH MACRON
+ 0x0100,
+ // Ll: LATIN SMALL LETTER A WITH MACRON
+ 0x0101,
+ // Lt: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+ 0x01C5,
+ // M: COMBINING GRAVE ACCENT
+ 0x0300,
+ // Nd: ARABIC-INDIC DIGIT ZERO
+ 0x0660,
+ // P: GREEK QUESTION MARK
+ 0x037E,
+ // S: MODIFIER LETTER LEFT ARROWHEAD
+ 0x02C2,
+ // Z: OGHAM SPACE MARK
+ 0x1680,
+ }
+ for i := 0; i < MaxLatin1+len(nonLatin1); i++ {
+ base := uint32(i)
+ if i >= MaxLatin1 {
+ base = nonLatin1[i-MaxLatin1]
+ }
+
+ // Note r is negative, but uint8(r) == uint8(base) and
+ // uint16(r) == uint16(base).
+ r := rune(base - 1<<31)
+ if Is(Letter, r) {
+ t.Errorf("Is(Letter, 0x%x - 1<<31) = true, want false", base)
+ }
+ if IsControl(r) {
+ t.Errorf("IsControl(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsDigit(r) {
+ t.Errorf("IsDigit(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsGraphic(r) {
+ t.Errorf("IsGraphic(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsLetter(r) {
+ t.Errorf("IsLetter(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsLower(r) {
+ t.Errorf("IsLower(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsMark(r) {
+ t.Errorf("IsMark(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsNumber(r) {
+ t.Errorf("IsNumber(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsPrint(r) {
+ t.Errorf("IsPrint(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsPunct(r) {
+ t.Errorf("IsPunct(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsSpace(r) {
+ t.Errorf("IsSpace(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsSymbol(r) {
+ t.Errorf("IsSymbol(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsTitle(r) {
+ t.Errorf("IsTitle(0x%x - 1<<31) = true, want false", base)
+ }
+ if IsUpper(r) {
+ t.Errorf("IsUpper(0x%x - 1<<31) = true, want false", base)
+ }
+ }
+}