summaryrefslogtreecommitdiff
path: root/test/longwrds.awk
blob: 77654bb0c4c18e8afef99a679bf6f9771fb86990 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# From Gawk Manual modified by bug fix and removal of punctuation

# Invoker can customize sort command if necessary.
BEGIN {
	if (!SORT) SORT = "LC_ALL=C sort"
}

# Record every word which is used at least once
{
	for (i = 1; i <= NF; i++) {
		tmp = tolower($i)
		if (0 != (pos = match(tmp, /([[:lower:]]|-)+/)))
			used[substr(tmp, pos, RLENGTH)] = 1
	}
}

#Find a number of distinct words longer than 10 characters
END {
	num_long_words = 0
	for (x in used) 
		if (length(x) > 10) {
			++num_long_words
			print x | SORT
		}
	print(num_long_words, "long words") | SORT
	close(SORT)
}