summaryrefslogtreecommitdiff
path: root/libgo/go/strings
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/strings')
-rw-r--r--libgo/go/strings/example_test.go16
-rw-r--r--libgo/go/strings/strings.go126
-rw-r--r--libgo/go/strings/strings_test.go95
3 files changed, 211 insertions, 26 deletions
diff --git a/libgo/go/strings/example_test.go b/libgo/go/strings/example_test.go
index 733caf5f2db..36e0a42fb03 100644
--- a/libgo/go/strings/example_test.go
+++ b/libgo/go/strings/example_test.go
@@ -179,3 +179,19 @@ func ExampleToLower() {
fmt.Println(strings.ToLower("Gopher"))
// Output: gopher
}
+
+func ExampleTrimSuffix() {
+ var s = "Hello, goodbye, etc!"
+ s = strings.TrimSuffix(s, "goodbye, etc!")
+ s = strings.TrimSuffix(s, "planet")
+ fmt.Print(s, "world!")
+ // Output: Hello, world!
+}
+
+func ExampleTrimPrefix() {
+ var s = "Goodbye,, world!"
+ s = strings.TrimPrefix(s, "Goodbye,")
+ s = strings.TrimPrefix(s, "Howdy,")
+ fmt.Print("Hello" + s)
+ // Output: Hello, world!
+}
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go
index b411ba5d8b3..986f6d61ebc 100644
--- a/libgo/go/strings/strings.go
+++ b/libgo/go/strings/strings.go
@@ -26,7 +26,11 @@ func explode(s string, n int) []string {
i, cur := 0, 0
for ; i+1 < n; i++ {
ch, size = utf8.DecodeRuneInString(s[cur:])
- a[i] = string(ch)
+ if ch == utf8.RuneError {
+ a[i] = string(utf8.RuneError)
+ } else {
+ a[i] = s[cur : cur+size]
+ }
cur += size
}
// add the rest, if there is any
@@ -36,27 +40,69 @@ func explode(s string, n int) []string {
return a
}
+// primeRK is the prime base used in Rabin-Karp algorithm.
+const primeRK = 16777619
+
+// hashstr returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func hashstr(sep string) (uint32, uint32) {
+ hash := uint32(0)
+ for i := 0; i < len(sep); i++ {
+ hash = hash*primeRK + uint32(sep[i])
+
+ }
+ var pow, sq uint32 = 1, primeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
// Count counts the number of non-overlapping instances of sep in s.
func Count(s, sep string) int {
- if sep == "" {
- return utf8.RuneCountInString(s) + 1
- }
- c := sep[0]
- l := len(sep)
n := 0
- if l == 1 {
+ // special cases
+ switch {
+ case len(sep) == 0:
+ return utf8.RuneCountInString(s) + 1
+ case len(sep) == 1:
// special case worth making fast
+ c := sep[0]
for i := 0; i < len(s); i++ {
if s[i] == c {
n++
}
}
return n
+ case len(sep) > len(s):
+ return 0
+ case len(sep) == len(s):
+ if sep == s {
+ return 1
+ }
+ return 0
+ }
+ hashsep, pow := hashstr(sep)
+ h := uint32(0)
+ for i := 0; i < len(sep); i++ {
+ h = h*primeRK + uint32(s[i])
+ }
+ lastmatch := 0
+ if h == hashsep && s[:len(sep)] == sep {
+ n++
+ lastmatch = len(sep)
}
- for i := 0; i+l <= len(s); i++ {
- if s[i] == c && s[i:i+l] == sep {
+ for i := len(sep); i < len(s); {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-len(sep)])
+ i++
+ if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
n++
- i += l - 1
+ lastmatch = i
}
}
return n
@@ -80,11 +126,11 @@ func ContainsRune(s string, r rune) bool {
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
func Index(s, sep string) int {
n := len(sep)
- if n == 0 {
+ switch {
+ case n == 0:
return 0
- }
- c := sep[0]
- if n == 1 {
+ case n == 1:
+ c := sep[0]
// special case worth making fast
for i := 0; i < len(s); i++ {
if s[i] == c {
@@ -92,11 +138,30 @@ func Index(s, sep string) int {
}
}
return -1
+ case n == len(s):
+ if sep == s {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
}
- // n > 1
- for i := 0; i+n <= len(s); i++ {
- if s[i] == c && s[i:i+n] == sep {
- return i
+ // Hash sep.
+ hashsep, pow := hashstr(sep)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashsep && s[:n] == sep {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashsep && s[i-n:i] == sep {
+ return i - n
}
}
return -1
@@ -244,7 +309,8 @@ func SplitAfter(s, sep string) []string {
}
// Fields splits the string s around each instance of one or more consecutive white space
-// characters, returning an array of substrings of s or an empty list if s contains only white space.
+// characters, as defined by unicode.IsSpace, returning an array of substrings of s or an
+// empty list if s contains only white space.
func Fields(s string) []string {
return FieldsFunc(s, unicode.IsSpace)
}
@@ -426,10 +492,10 @@ func isSeparator(r rune) bool {
return unicode.IsSpace(r)
}
-// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
-
// Title returns a copy of the string s with all Unicode letters that begin words
// mapped to their title case.
+//
+// BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly.
func Title(s string) string {
// Use a closure here to remember state.
// Hackish but effective. Depends on Map scanning in order and calling
@@ -558,6 +624,24 @@ func TrimSpace(s string) string {
return TrimFunc(s, unicode.IsSpace)
}
+// TrimPrefix returns s without the provided leading prefix string.
+// If s doesn't start with prefix, s is returned unchanged.
+func TrimPrefix(s, prefix string) string {
+ if HasPrefix(s, prefix) {
+ return s[len(prefix):]
+ }
+ return s
+}
+
+// TrimSuffix returns s without the provided trailing suffix string.
+// If s doesn't end with suffix, s is returned unchanged.
+func TrimSuffix(s, suffix string) string {
+ if HasSuffix(s, suffix) {
+ return s[:len(s)-len(suffix)]
+ }
+ return s
+}
+
// Replace returns a copy of the string s with the first n
// non-overlapping instances of old replaced by new.
// If n < 0, there is no limit on the number of replacements.
diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go
index 7be41a8dcad..68b658ca466 100644
--- a/libgo/go/strings/strings_test.go
+++ b/libgo/go/strings/strings_test.go
@@ -496,8 +496,8 @@ func TestSpecialCase(t *testing.T) {
func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
var trimTests = []struct {
- f string
- in, cutset, out string
+ f string
+ in, arg, out string
}{
{"Trim", "abba", "a", "bb"},
{"Trim", "abba", "ab", ""},
@@ -520,6 +520,10 @@ var trimTests = []struct {
{"TrimRight", "", "123", ""},
{"TrimRight", "", "", ""},
{"TrimRight", "☺\xc0", "☺", "☺\xc0"},
+ {"TrimPrefix", "aabb", "a", "abb"},
+ {"TrimPrefix", "aabb", "b", "aabb"},
+ {"TrimSuffix", "aabb", "a", "aabb"},
+ {"TrimSuffix", "aabb", "b", "aab"},
}
func TestTrim(t *testing.T) {
@@ -533,12 +537,16 @@ func TestTrim(t *testing.T) {
f = TrimLeft
case "TrimRight":
f = TrimRight
+ case "TrimPrefix":
+ f = TrimPrefix
+ case "TrimSuffix":
+ f = TrimSuffix
default:
t.Errorf("Undefined trim function %s", name)
}
- actual := f(tc.in, tc.cutset)
+ actual := f(tc.in, tc.arg)
if actual != tc.out {
- t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.cutset, actual, tc.out)
+ t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
}
}
}
@@ -959,7 +967,7 @@ var ContainsRuneTests = []struct {
func TestContainsRune(t *testing.T) {
for _, ct := range ContainsRuneTests {
if ContainsRune(ct.str, ct.r) != ct.expected {
- t.Errorf("ContainsRune(%s, %s) = %v, want %v",
+ t.Errorf("ContainsRune(%q, %q) = %v, want %v",
ct.str, ct.r, !ct.expected, ct.expected)
}
}
@@ -993,6 +1001,65 @@ func TestEqualFold(t *testing.T) {
}
}
+func makeBenchInputHard() string {
+ tokens := [...]string{
+ "<a>", "<p>", "<b>", "<strong>",
+ "</a>", "</p>", "</b>", "</strong>",
+ "hello", "world",
+ }
+ x := make([]byte, 0, 1<<20)
+ for len(x) < 1<<20 {
+ i := rand.Intn(len(tokens))
+ x = append(x, tokens[i]...)
+ }
+ return string(x)
+}
+
+var benchInputHard = makeBenchInputHard()
+
+func benchmarkIndexHard(b *testing.B, sep string) {
+ for i := 0; i < b.N; i++ {
+ Index(benchInputHard, sep)
+ }
+}
+
+func benchmarkCountHard(b *testing.B, sep string) {
+ for i := 0; i < b.N; i++ {
+ Count(benchInputHard, sep)
+ }
+}
+
+func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
+func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
+func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
+
+func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
+func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
+func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
+
+var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
+var benchNeedleTorture = Repeat("ABC", 1<<10+1)
+
+func BenchmarkIndexTorture(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Index(benchInputTorture, benchNeedleTorture)
+ }
+}
+
+func BenchmarkCountTorture(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Count(benchInputTorture, benchNeedleTorture)
+ }
+}
+
+func BenchmarkCountTortureOverlapping(b *testing.B) {
+ A := Repeat("ABC", 1<<20)
+ B := Repeat("ABC", 1<<10)
+ for i := 0; i < b.N; i++ {
+ Count(A, B)
+ }
+}
+
var makeFieldsInput = func() string {
x := make([]byte, 1<<20)
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
@@ -1028,3 +1095,21 @@ func BenchmarkFieldsFunc(b *testing.B) {
FieldsFunc(fieldsInput, unicode.IsSpace)
}
}
+
+func BenchmarkSplit1(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Split(benchInputHard, "")
+ }
+}
+
+func BenchmarkSplit2(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Split(benchInputHard, "/")
+ }
+}
+
+func BenchmarkSplit3(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Split(benchInputHard, "hello")
+ }
+}