diff options
Diffstat (limited to 'libgo/go/strings')
-rw-r--r-- | libgo/go/strings/example_test.go | 16 | ||||
-rw-r--r-- | libgo/go/strings/strings.go | 126 | ||||
-rw-r--r-- | libgo/go/strings/strings_test.go | 95 |
3 files changed, 211 insertions, 26 deletions
diff --git a/libgo/go/strings/example_test.go b/libgo/go/strings/example_test.go index 733caf5f2db..36e0a42fb03 100644 --- a/libgo/go/strings/example_test.go +++ b/libgo/go/strings/example_test.go @@ -179,3 +179,19 @@ func ExampleToLower() { fmt.Println(strings.ToLower("Gopher")) // Output: gopher } + +func ExampleTrimSuffix() { + var s = "Hello, goodbye, etc!" + s = strings.TrimSuffix(s, "goodbye, etc!") + s = strings.TrimSuffix(s, "planet") + fmt.Print(s, "world!") + // Output: Hello, world! +} + +func ExampleTrimPrefix() { + var s = "Goodbye,, world!" + s = strings.TrimPrefix(s, "Goodbye,") + s = strings.TrimPrefix(s, "Howdy,") + fmt.Print("Hello" + s) + // Output: Hello, world! +} diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go index b411ba5d8b3..986f6d61ebc 100644 --- a/libgo/go/strings/strings.go +++ b/libgo/go/strings/strings.go @@ -26,7 +26,11 @@ func explode(s string, n int) []string { i, cur := 0, 0 for ; i+1 < n; i++ { ch, size = utf8.DecodeRuneInString(s[cur:]) - a[i] = string(ch) + if ch == utf8.RuneError { + a[i] = string(utf8.RuneError) + } else { + a[i] = s[cur : cur+size] + } cur += size } // add the rest, if there is any @@ -36,27 +40,69 @@ func explode(s string, n int) []string { return a } +// primeRK is the prime base used in Rabin-Karp algorithm. +const primeRK = 16777619 + +// hashstr returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func hashstr(sep string) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*primeRK + uint32(sep[i]) + + } + var pow, sq uint32 = 1, primeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + // Count counts the number of non-overlapping instances of sep in s. func Count(s, sep string) int { - if sep == "" { - return utf8.RuneCountInString(s) + 1 - } - c := sep[0] - l := len(sep) n := 0 - if l == 1 { + // special cases + switch { + case len(sep) == 0: + return utf8.RuneCountInString(s) + 1 + case len(sep) == 1: // special case worth making fast + c := sep[0] for i := 0; i < len(s); i++ { if s[i] == c { n++ } } return n + case len(sep) > len(s): + return 0 + case len(sep) == len(s): + if sep == s { + return 1 + } + return 0 + } + hashsep, pow := hashstr(sep) + h := uint32(0) + for i := 0; i < len(sep); i++ { + h = h*primeRK + uint32(s[i]) + } + lastmatch := 0 + if h == hashsep && s[:len(sep)] == sep { + n++ + lastmatch = len(sep) } - for i := 0; i+l <= len(s); i++ { - if s[i] == c && s[i:i+l] == sep { + for i := len(sep); i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-len(sep)]) + i++ + if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep { n++ - i += l - 1 + lastmatch = i } } return n @@ -80,11 +126,11 @@ func ContainsRune(s string, r rune) bool { // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. func Index(s, sep string) int { n := len(sep) - if n == 0 { + switch { + case n == 0: return 0 - } - c := sep[0] - if n == 1 { + case n == 1: + c := sep[0] // special case worth making fast for i := 0; i < len(s); i++ { if s[i] == c { @@ -92,11 +138,30 @@ func Index(s, sep string) int { } } return -1 + case n == len(s): + if sep == s { + return 0 + } + return -1 + case n > len(s): + return -1 } - // n > 1 - for i := 0; i+n <= len(s); i++ { - if s[i] == c && s[i:i+n] == sep { - return i + // Hash sep. + hashsep, pow := hashstr(sep) + var h uint32 + for i := 0; i < n; i++ { + h = h*primeRK + uint32(s[i]) + } + if h == hashsep && s[:n] == sep { + return 0 + } + for i := n; i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashsep && s[i-n:i] == sep { + return i - n } } return -1 @@ -244,7 +309,8 @@ func SplitAfter(s, sep string) []string { } // Fields splits the string s around each instance of one or more consecutive white space -// characters, returning an array of substrings of s or an empty list if s contains only white space. +// characters, as defined by unicode.IsSpace, returning an array of substrings of s or an +// empty list if s contains only white space. func Fields(s string) []string { return FieldsFunc(s, unicode.IsSpace) } @@ -426,10 +492,10 @@ func isSeparator(r rune) bool { return unicode.IsSpace(r) } -// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly. - // Title returns a copy of the string s with all Unicode letters that begin words // mapped to their title case. +// +// BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly. func Title(s string) string { // Use a closure here to remember state. // Hackish but effective. Depends on Map scanning in order and calling @@ -558,6 +624,24 @@ func TrimSpace(s string) string { return TrimFunc(s, unicode.IsSpace) } +// TrimPrefix returns s without the provided leading prefix string. +// If s doesn't start with prefix, s is returned unchanged. +func TrimPrefix(s, prefix string) string { + if HasPrefix(s, prefix) { + return s[len(prefix):] + } + return s +} + +// TrimSuffix returns s without the provided trailing suffix string. +// If s doesn't end with suffix, s is returned unchanged. +func TrimSuffix(s, suffix string) string { + if HasSuffix(s, suffix) { + return s[:len(s)-len(suffix)] + } + return s +} + // Replace returns a copy of the string s with the first n // non-overlapping instances of old replaced by new. // If n < 0, there is no limit on the number of replacements. diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go index 7be41a8dcad..68b658ca466 100644 --- a/libgo/go/strings/strings_test.go +++ b/libgo/go/strings/strings_test.go @@ -496,8 +496,8 @@ func TestSpecialCase(t *testing.T) { func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) } var trimTests = []struct { - f string - in, cutset, out string + f string + in, arg, out string }{ {"Trim", "abba", "a", "bb"}, {"Trim", "abba", "ab", ""}, @@ -520,6 +520,10 @@ var trimTests = []struct { {"TrimRight", "", "123", ""}, {"TrimRight", "", "", ""}, {"TrimRight", "☺\xc0", "☺", "☺\xc0"}, + {"TrimPrefix", "aabb", "a", "abb"}, + {"TrimPrefix", "aabb", "b", "aabb"}, + {"TrimSuffix", "aabb", "a", "aabb"}, + {"TrimSuffix", "aabb", "b", "aab"}, } func TestTrim(t *testing.T) { @@ -533,12 +537,16 @@ func TestTrim(t *testing.T) { f = TrimLeft case "TrimRight": f = TrimRight + case "TrimPrefix": + f = TrimPrefix + case "TrimSuffix": + f = TrimSuffix default: t.Errorf("Undefined trim function %s", name) } - actual := f(tc.in, tc.cutset) + actual := f(tc.in, tc.arg) if actual != tc.out { - t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.cutset, actual, tc.out) + t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out) } } } @@ -959,7 +967,7 @@ var ContainsRuneTests = []struct { func TestContainsRune(t *testing.T) { for _, ct := range ContainsRuneTests { if ContainsRune(ct.str, ct.r) != ct.expected { - t.Errorf("ContainsRune(%s, %s) = %v, want %v", + t.Errorf("ContainsRune(%q, %q) = %v, want %v", ct.str, ct.r, !ct.expected, ct.expected) } } @@ -993,6 +1001,65 @@ func TestEqualFold(t *testing.T) { } } +func makeBenchInputHard() string { + tokens := [...]string{ + "<a>", "<p>", "<b>", "<strong>", + "</a>", "</p>", "</b>", "</strong>", + "hello", "world", + } + x := make([]byte, 0, 1<<20) + for len(x) < 1<<20 { + i := rand.Intn(len(tokens)) + x = append(x, tokens[i]...) + } + return string(x) +} + +var benchInputHard = makeBenchInputHard() + +func benchmarkIndexHard(b *testing.B, sep string) { + for i := 0; i < b.N; i++ { + Index(benchInputHard, sep) + } +} + +func benchmarkCountHard(b *testing.B, sep string) { + for i := 0; i < b.N; i++ { + Count(benchInputHard, sep) + } +} + +func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") } +func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") } +func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") } + +func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") } +func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") } +func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") } + +var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10) +var benchNeedleTorture = Repeat("ABC", 1<<10+1) + +func BenchmarkIndexTorture(b *testing.B) { + for i := 0; i < b.N; i++ { + Index(benchInputTorture, benchNeedleTorture) + } +} + +func BenchmarkCountTorture(b *testing.B) { + for i := 0; i < b.N; i++ { + Count(benchInputTorture, benchNeedleTorture) + } +} + +func BenchmarkCountTortureOverlapping(b *testing.B) { + A := Repeat("ABC", 1<<20) + B := Repeat("ABC", 1<<10) + for i := 0; i < b.N; i++ { + Count(A, B) + } +} + var makeFieldsInput = func() string { x := make([]byte, 1<<20) // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space. @@ -1028,3 +1095,21 @@ func BenchmarkFieldsFunc(b *testing.B) { FieldsFunc(fieldsInput, unicode.IsSpace) } } + +func BenchmarkSplit1(b *testing.B) { + for i := 0; i < b.N; i++ { + Split(benchInputHard, "") + } +} + +func BenchmarkSplit2(b *testing.B) { + for i := 0; i < b.N; i++ { + Split(benchInputHard, "/") + } +} + +func BenchmarkSplit3(b *testing.B) { + for i := 0; i < b.N; i++ { + Split(benchInputHard, "hello") + } +} |