summaryrefslogtreecommitdiff
path: root/src/pkg/strings/search.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/strings/search.go')
-rw-r--r--src/pkg/strings/search.go124
1 files changed, 0 insertions, 124 deletions
diff --git a/src/pkg/strings/search.go b/src/pkg/strings/search.go
deleted file mode 100644
index f77c879c5..000000000
--- a/src/pkg/strings/search.go
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strings
-
-// stringFinder efficiently finds strings in a source text. It's implemented
-// using the Boyer-Moore string search algorithm:
-// http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
-// http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged
-// document uses 1-based indexing)
-type stringFinder struct {
- // pattern is the string that we are searching for in the text.
- pattern string
-
- // badCharSkip[b] contains the distance between the last byte of pattern
- // and the rightmost occurrence of b in pattern. If b is not in pattern,
- // badCharSkip[b] is len(pattern).
- //
- // Whenever a mismatch is found with byte b in the text, we can safely
- // shift the matching frame at least badCharSkip[b] until the next time
- // the matching char could be in alignment.
- badCharSkip [256]int
-
- // goodSuffixSkip[i] defines how far we can shift the matching frame given
- // that the suffix pattern[i+1:] matches, but the byte pattern[i] does
- // not. There are two cases to consider:
- //
- // 1. The matched suffix occurs elsewhere in pattern (with a different
- // byte preceding it that we might possibly match). In this case, we can
- // shift the matching frame to align with the next suffix chunk. For
- // example, the pattern "mississi" has the suffix "issi" next occurring
- // (in right-to-left order) at index 1, so goodSuffixSkip[3] ==
- // shift+len(suffix) == 3+4 == 7.
- //
- // 2. If the matched suffix does not occur elsewhere in pattern, then the
- // matching frame may share part of its prefix with the end of the
- // matching suffix. In this case, goodSuffixSkip[i] will contain how far
- // to shift the frame to align this portion of the prefix to the
- // suffix. For example, in the pattern "abcxxxabc", when the first
- // mismatch from the back is found to be in position 3, the matching
- // suffix "xxabc" is not found elsewhere in the pattern. However, its
- // rightmost "abc" (at position 6) is a prefix of the whole pattern, so
- // goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.
- goodSuffixSkip []int
-}
-
-func makeStringFinder(pattern string) *stringFinder {
- f := &stringFinder{
- pattern: pattern,
- goodSuffixSkip: make([]int, len(pattern)),
- }
- // last is the index of the last character in the pattern.
- last := len(pattern) - 1
-
- // Build bad character table.
- // Bytes not in the pattern can skip one pattern's length.
- for i := range f.badCharSkip {
- f.badCharSkip[i] = len(pattern)
- }
- // The loop condition is < instead of <= so that the last byte does not
- // have a zero distance to itself. Finding this byte out of place implies
- // that it is not in the last position.
- for i := 0; i < last; i++ {
- f.badCharSkip[pattern[i]] = last - i
- }
-
- // Build good suffix table.
- // First pass: set each value to the next index which starts a prefix of
- // pattern.
- lastPrefix := last
- for i := last; i >= 0; i-- {
- if HasPrefix(pattern, pattern[i+1:]) {
- lastPrefix = i + 1
- }
- // lastPrefix is the shift, and (last-i) is len(suffix).
- f.goodSuffixSkip[i] = lastPrefix + last - i
- }
- // Second pass: find repeats of pattern's suffix starting from the front.
- for i := 0; i < last; i++ {
- lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
- if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
- // (last-i) is the shift, and lenSuffix is len(suffix).
- f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
- }
- }
-
- return f
-}
-
-func longestCommonSuffix(a, b string) (i int) {
- for ; i < len(a) && i < len(b); i++ {
- if a[len(a)-1-i] != b[len(b)-1-i] {
- break
- }
- }
- return
-}
-
-// next returns the index in text of the first occurrence of the pattern. If
-// the pattern is not found, it returns -1.
-func (f *stringFinder) next(text string) int {
- i := len(f.pattern) - 1
- for i < len(text) {
- // Compare backwards from the end until the first unmatching character.
- j := len(f.pattern) - 1
- for j >= 0 && text[i] == f.pattern[j] {
- i--
- j--
- }
- if j < 0 {
- return i + 1 // match
- }
- i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
- }
- return -1
-}
-
-func max(a, b int) int {
- if a > b {
- return a
- }
- return b
-}