summaryrefslogtreecommitdiff
path: root/libgo/go/index/suffixarray/suffixarray_test.go
blob: 8280750eddaf6ec505ae57eec89f78ff9b4caf6e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package suffixarray

import (
	"container/vector"
	"sort"
	"strings"
	"testing"
)


type testCase struct {
	name    string   // name of test case
	source  string   // source to index
	lookups []string // strings to lookup
}


var testCases = []testCase{
	{
		"empty string",
		"",
		[]string{
			"",
			"foo",
		},
	},

	{
		"all a's",
		"aaaaaaaaaa", // 10 a's
		[]string{
			"",
			"a",
			"aa",
			"aaa",
			"aaaa",
			"aaaaa",
			"aaaaaa",
			"aaaaaaa",
			"aaaaaaaa",
			"aaaaaaaaa",
			"aaaaaaaaaa",
			"aaaaaaaaaaa", // 11 a's
		},
	},

	{
		"abc",
		"abc",
		[]string{
			"a",
			"b",
			"c",
			"ab",
			"bc",
			"abc",
		},
	},

	{
		"barbara*3",
		"barbarabarbarabarbara",
		[]string{
			"a",
			"bar",
			"rab",
			"arab",
			"barbar",
		},
	},

	{
		"typing drill",
		"Now is the time for all good men to come to the aid of their country.",
		[]string{
			"Now",
			"the time",
			"to come the aid",
			"is the time for all good men to come to the aid of their",
		},
	},
}


// find all occurences of s in source; report at most n occurences
func find(src, s string, n int) []int {
	var res vector.IntVector
	if s != "" && n != 0 {
		// find at most n occurences of s in src
		for i := -1; n < 0 || len(res) < n; {
			j := strings.Index(src[i+1:], s)
			if j < 0 {
				break
			}
			i += j + 1
			res.Push(i)
		}
	}
	return res
}


func testLookups(t *testing.T, src string, x *Index, tc *testCase, n int) {
	for _, s := range tc.lookups {
		res := x.Lookup([]byte(s), n)
		exp := find(tc.source, s, n)

		// check that the lengths match
		if len(res) != len(exp) {
			t.Errorf("test %q, lookup %q (n = %d): expected %d results; got %d", tc.name, s, n, len(exp), len(res))
		}

		// if n >= 0 the number of results is limited --- unless n >= all results,
		// we may obtain different positions from the Index and from find (because
		// Index may not find the results in the same order as find) => in general
		// we cannot simply check that the res and exp lists are equal

		// check that there are no duplicates
		sort.SortInts(res)
		for i, r := range res {
			if i > 0 && res[i-1] == r {
				t.Errorf("test %q, lookup %q, result %d (n = %d): found duplicate index %d", tc.name, s, i, n, r)
			}
		}

		// check that each result is in fact a correct match
		for i, r := range res {
			if r < 0 || len(src) <= r {
				t.Errorf("test %q, lookup %q, result %d (n = %d): index %d out of range [0, %d[", tc.name, s, i, n, r, len(src))
			} else if !strings.HasPrefix(src[r:], s) {
				t.Errorf("test %q, lookup %q, result %d (n = %d): index %d not a match", tc.name, s, i, n, r)
			}
		}

		if n < 0 {
			// all results computed - sorted res and exp must be equal
			for i, r := range res {
				e := exp[i]
				if r != e {
					t.Errorf("test %q, lookup %q, result %d: expected index %d; got %d", tc.name, s, i, e, r)
					continue
				}
			}
		}
	}
}


func TestIndex(t *testing.T) {
	for _, tc := range testCases {
		x := New([]byte(tc.source))
		testLookups(t, tc.source, x, &tc, 0)
		testLookups(t, tc.source, x, &tc, 1)
		testLookups(t, tc.source, x, &tc, 10)
		testLookups(t, tc.source, x, &tc, -1)
	}
}