build: move package sources from src/pkg to src

Preparation was in CL 134570043. This CL contains only the effect of 'hg mv src/pkg/* src'. For more about the move, see golang.org/s/go14nopkg.
author: Russ Cox <rsc@golang.org> 2014-09-08 00:08:51 -0400
committer: Russ Cox <rsc@golang.org> 2014-09-08 00:08:51 -0400
commit: 8528da672cc093d4dd06732819abc1f7b6b5a46e (patch)
tree: 334be80d4a4c85b77db6f6fdb67cbf0528cba5f5 /src/strings/search.go
parent: 73bcb69f272cbf34ddcc9daa56427a8683b5a95d (diff)
download: go-8528da672cc093d4dd06732819abc1f7b6b5a46e.tar.gz
1 files changed, 124 insertions, 0 deletions
diff --git a/src/strings/search.go b/src/strings/search.go
new file mode 100644
index 000000000..f77c879c5
--- /dev/null
+++ b/src/strings/search.go
@@ -0,0 +1,124 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+// stringFinder efficiently finds strings in a source text. It's implemented
+// using the Boyer-Moore string search algorithm:
+// http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
+// http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged
+// document uses 1-based indexing)
+type stringFinder struct {
+	// pattern is the string that we are searching for in the text.
+	pattern string
+
+	// badCharSkip[b] contains the distance between the last byte of pattern
+	// and the rightmost occurrence of b in pattern. If b is not in pattern,
+	// badCharSkip[b] is len(pattern).
+	//
+	// Whenever a mismatch is found with byte b in the text, we can safely
+	// shift the matching frame at least badCharSkip[b] until the next time
+	// the matching char could be in alignment.
+	badCharSkip [256]int
+
+	// goodSuffixSkip[i] defines how far we can shift the matching frame given
+	// that the suffix pattern[i+1:] matches, but the byte pattern[i] does
+	// not. There are two cases to consider:
+	//
+	// 1. The matched suffix occurs elsewhere in pattern (with a different
+	// byte preceding it that we might possibly match). In this case, we can
+	// shift the matching frame to align with the next suffix chunk. For
+	// example, the pattern "mississi" has the suffix "issi" next occurring
+	// (in right-to-left order) at index 1, so goodSuffixSkip[3] ==
+	// shift+len(suffix) == 3+4 == 7.
+	//
+	// 2. If the matched suffix does not occur elsewhere in pattern, then the
+	// matching frame may share part of its prefix with the end of the
+	// matching suffix. In this case, goodSuffixSkip[i] will contain how far
+	// to shift the frame to align this portion of the prefix to the
+	// suffix. For example, in the pattern "abcxxxabc", when the first
+	// mismatch from the back is found to be in position 3, the matching
+	// suffix "xxabc" is not found elsewhere in the pattern. However, its
+	// rightmost "abc" (at position 6) is a prefix of the whole pattern, so
+	// goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.
+	goodSuffixSkip []int
+}
+
+func makeStringFinder(pattern string) *stringFinder {
+	f := &stringFinder{
+		pattern:        pattern,
+		goodSuffixSkip: make([]int, len(pattern)),
+	}
+	// last is the index of the last character in the pattern.
+	last := len(pattern) - 1
+
+	// Build bad character table.
+	// Bytes not in the pattern can skip one pattern's length.
+	for i := range f.badCharSkip {
+		f.badCharSkip[i] = len(pattern)
+	}
+	// The loop condition is < instead of <= so that the last byte does not
+	// have a zero distance to itself. Finding this byte out of place implies
+	// that it is not in the last position.
+	for i := 0; i < last; i++ {
+		f.badCharSkip[pattern[i]] = last - i
+	}
+
+	// Build good suffix table.
+	// First pass: set each value to the next index which starts a prefix of
+	// pattern.
+	lastPrefix := last
+	for i := last; i >= 0; i-- {
+		if HasPrefix(pattern, pattern[i+1:]) {
+			lastPrefix = i + 1
+		}
+		// lastPrefix is the shift, and (last-i) is len(suffix).
+		f.goodSuffixSkip[i] = lastPrefix + last - i
+	}
+	// Second pass: find repeats of pattern's suffix starting from the front.
+	for i := 0; i < last; i++ {
+		lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
+		if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
+			// (last-i) is the shift, and lenSuffix is len(suffix).
+			f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
+		}
+	}
+
+	return f
+}
+
+func longestCommonSuffix(a, b string) (i int) {
+	for ; i < len(a) && i < len(b); i++ {
+		if a[len(a)-1-i] != b[len(b)-1-i] {
+			break
+		}
+	}
+	return
+}
+
+// next returns the index in text of the first occurrence of the pattern. If
+// the pattern is not found, it returns -1.
+func (f *stringFinder) next(text string) int {
+	i := len(f.pattern) - 1
+	for i < len(text) {
+		// Compare backwards from the end until the first unmatching character.
+		j := len(f.pattern) - 1
+		for j >= 0 && text[i] == f.pattern[j] {
+			i--
+			j--
+		}
+		if j < 0 {
+			return i + 1 // match
+		}
+		i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
+	}
+	return -1
+}
+
+func max(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
author	Russ Cox <rsc@golang.org>	2014-09-08 00:08:51 -0400
committer	Russ Cox <rsc@golang.org>	2014-09-08 00:08:51 -0400
commit	8528da672cc093d4dd06732819abc1f7b6b5a46e (patch)
tree	334be80d4a4c85b77db6f6fdb67cbf0528cba5f5 /src/strings/search.go
parent	73bcb69f272cbf34ddcc9daa56427a8683b5a95d (diff)
download	go-8528da672cc093d4dd06732819abc1f7b6b5a46e.tar.gz