summaryrefslogtreecommitdiff
path: root/src/unicode
diff options
context:
space:
mode:
authorEgon Elbre <egonelbre@gmail.com>2015-11-17 16:51:23 +0200
committerBrad Fitzpatrick <bradfitz@golang.org>2016-04-26 21:59:50 +0000
commite607abbfd6e0550c13f4fa7b666d033eb9b14759 (patch)
tree928cf2b0a6713490c0a8dcf9510cbc5b6c19a3f6 /src/unicode
parent6e4a8615f652a2020471622354be6d890404020c (diff)
downloadgo-git-e607abbfd6e0550c13f4fa7b666d033eb9b14759.tar.gz
unicode: improve SimpleFold performance for ascii
This change significantly speeds up case-insensitive regexp matching. benchmark old ns/op new ns/op delta BenchmarkMatchEasy0i_32-8 2690 1473 -45.24% BenchmarkMatchEasy0i_1K-8 80404 42269 -47.43% BenchmarkMatchEasy0i_32K-8 3272187 2076118 -36.55% BenchmarkMatchEasy0i_1M-8 104805990 66503805 -36.55% BenchmarkMatchEasy0i_32M-8 3360192200 2126121600 -36.73% benchmark old MB/s new MB/s speedup BenchmarkMatchEasy0i_32-8 11.90 21.72 1.83x BenchmarkMatchEasy0i_1K-8 12.74 24.23 1.90x BenchmarkMatchEasy0i_32K-8 10.01 15.78 1.58x BenchmarkMatchEasy0i_1M-8 10.00 15.77 1.58x BenchmarkMatchEasy0i_32M-8 9.99 15.78 1.58x Issue #13288 Change-Id: I94af7bb29e75d60b4f6ee760124867ab271b9642 Reviewed-on: https://go-review.googlesource.com/16943 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/unicode')
-rw-r--r--src/unicode/letter.go4
-rw-r--r--src/unicode/maketables.go20
-rw-r--r--src/unicode/tables.go131
3 files changed, 155 insertions, 0 deletions
diff --git a/src/unicode/letter.go b/src/unicode/letter.go
index ffa083eb57..8aec920d22 100644
--- a/src/unicode/letter.go
+++ b/src/unicode/letter.go
@@ -332,6 +332,10 @@ type foldPair struct {
// SimpleFold('1') = '1'
//
func SimpleFold(r rune) rune {
+ if int(r) < len(asciiFold) {
+ return rune(asciiFold[r])
+ }
+
// Consult caseOrbit table for special cases.
lo := 0
hi := len(caseOrbit)
diff --git a/src/unicode/maketables.go b/src/unicode/maketables.go
index 328c75ed63..f364515c90 100644
--- a/src/unicode/maketables.go
+++ b/src/unicode/maketables.go
@@ -1172,6 +1172,7 @@ func printCasefold() {
}
}
+ printAsciiFold()
printCaseOrbit()
// Tables of category and script folding exceptions: code points
@@ -1269,6 +1270,25 @@ var comment = map[string]string{
"// If there is no entry for a script name, there are no such points.\n",
}
+func printAsciiFold() {
+ printf("var asciiFold = [MaxASCII + 1]uint16{\n")
+ for i := rune(0); i <= unicode.MaxASCII; i++ {
+ c := chars[i]
+ f := c.caseOrbit
+ if f == 0 {
+ if c.lowerCase != i && c.lowerCase != 0 {
+ f = c.lowerCase
+ } else if c.upperCase != i && c.upperCase != 0 {
+ f = c.upperCase
+ } else {
+ f = i
+ }
+ }
+ printf("\t0x%04X,\n", f)
+ }
+ printf("}\n\n")
+}
+
func printCaseOrbit() {
if *test {
for j := range chars {
diff --git a/src/unicode/tables.go b/src/unicode/tables.go
index 8bb42062f9..c04d69a6ff 100644
--- a/src/unicode/tables.go
+++ b/src/unicode/tables.go
@@ -6834,6 +6834,137 @@ var properties = [MaxLatin1 + 1]uint8{
0xFF: pLl | pp, // 'ΓΏ'
}
+var asciiFold = [MaxASCII + 1]uint16{
+ 0x0000,
+ 0x0001,
+ 0x0002,
+ 0x0003,
+ 0x0004,
+ 0x0005,
+ 0x0006,
+ 0x0007,
+ 0x0008,
+ 0x0009,
+ 0x000A,
+ 0x000B,
+ 0x000C,
+ 0x000D,
+ 0x000E,
+ 0x000F,
+ 0x0010,
+ 0x0011,
+ 0x0012,
+ 0x0013,
+ 0x0014,
+ 0x0015,
+ 0x0016,
+ 0x0017,
+ 0x0018,
+ 0x0019,
+ 0x001A,
+ 0x001B,
+ 0x001C,
+ 0x001D,
+ 0x001E,
+ 0x001F,
+ 0x0020,
+ 0x0021,
+ 0x0022,
+ 0x0023,
+ 0x0024,
+ 0x0025,
+ 0x0026,
+ 0x0027,
+ 0x0028,
+ 0x0029,
+ 0x002A,
+ 0x002B,
+ 0x002C,
+ 0x002D,
+ 0x002E,
+ 0x002F,
+ 0x0030,
+ 0x0031,
+ 0x0032,
+ 0x0033,
+ 0x0034,
+ 0x0035,
+ 0x0036,
+ 0x0037,
+ 0x0038,
+ 0x0039,
+ 0x003A,
+ 0x003B,
+ 0x003C,
+ 0x003D,
+ 0x003E,
+ 0x003F,
+ 0x0040,
+ 0x0061,
+ 0x0062,
+ 0x0063,
+ 0x0064,
+ 0x0065,
+ 0x0066,
+ 0x0067,
+ 0x0068,
+ 0x0069,
+ 0x006A,
+ 0x006B,
+ 0x006C,
+ 0x006D,
+ 0x006E,
+ 0x006F,
+ 0x0070,
+ 0x0071,
+ 0x0072,
+ 0x0073,
+ 0x0074,
+ 0x0075,
+ 0x0076,
+ 0x0077,
+ 0x0078,
+ 0x0079,
+ 0x007A,
+ 0x005B,
+ 0x005C,
+ 0x005D,
+ 0x005E,
+ 0x005F,
+ 0x0060,
+ 0x0041,
+ 0x0042,
+ 0x0043,
+ 0x0044,
+ 0x0045,
+ 0x0046,
+ 0x0047,
+ 0x0048,
+ 0x0049,
+ 0x004A,
+ 0x212A,
+ 0x004C,
+ 0x004D,
+ 0x004E,
+ 0x004F,
+ 0x0050,
+ 0x0051,
+ 0x0052,
+ 0x017F,
+ 0x0054,
+ 0x0055,
+ 0x0056,
+ 0x0057,
+ 0x0058,
+ 0x0059,
+ 0x005A,
+ 0x007B,
+ 0x007C,
+ 0x007D,
+ 0x007E,
+ 0x007F,
+}
+
var caseOrbit = []foldPair{
{0x004B, 0x006B},
{0x0053, 0x0073},