summaryrefslogtreecommitdiff
path: root/lib/soundex.pl.art
diff options
context:
space:
mode:
Diffstat (limited to 'lib/soundex.pl.art')
-rw-r--r--lib/soundex.pl.art285
1 files changed, 285 insertions, 0 deletions
diff --git a/lib/soundex.pl.art b/lib/soundex.pl.art
new file mode 100644
index 0000000000..1cc0b9e53c
--- /dev/null
+++ b/lib/soundex.pl.art
@@ -0,0 +1,285 @@
+Article 20106 of comp.lang.perl:
+Path: netlabs!news.cerf.net!ihnp4.ucsd.edu!mvb.saic.com!MathWorks.Com!noc.near.net!newshost.meiko.com!not-for-mail
+From: mike@meiko.com (Mike Stok)
+Newsgroups: comp.lang.perl
+Subject: Soundex (again :-)
+Date: 23 Mar 1994 19:44:35 -0500
+Organization: Meiko Scientific, Inc., MA
+Lines: 272
+Message-ID: <2mqnpj$qk4@hibbert.meiko.com>
+NNTP-Posting-Host: hibbert.meiko.com
+
+Thanks to Rich Pinder <rpinder@hsc.usc.edu> for finding a little bug in my
+soundex code I posted a while back. This showed up when he compared it
+with the output from Oracle's soundex function, and were caused by leading
+characters which were different but shared the same soundex code.
+
+Here's a fixed shar file...
+
+Mike
+
+#!/bin/sh
+# This is a shell archive (produced by shar 3.49)
+# To extract the files from this archive, save it to a file, remove
+# everything above the "!/bin/sh" line above, and type "sh file_name".
+#
+# made 03/24/1994 00:35 UTC by Mike.Stok@meiko.concord.ma.us
+# Source directory /tmp_mnt/develop/sw/misc/mike/soundex
+#
+# existing files will NOT be overwritten unless -c is specified
+#
+# This shar contains:
+# length mode name
+# ------ ---------- ------------------------------------------
+# 1677 -r--r--r-- soundex.pl
+# 2408 -r-xr-xr-x soundex.t
+#
+# ============= soundex.pl ==============
+if test -f 'soundex.pl' -a X"$1" != X"-c"; then
+ echo 'x - skipping soundex.pl (File already exists)'
+else
+echo 'x - extracting soundex.pl (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'soundex.pl' &&
+package soundex;
+X
+;# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $
+;#
+;# Implementation of soundex algorithm as described by Knuth in volume
+;# 3 of The Art of Computer Programming, with ideas stolen from Ian
+;# Phillips <ian@pipex.net>.
+;#
+;# Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994.
+;#
+;# Knuth's test cases are:
+;#
+;# Euler, Ellery -> E460
+;# Gauss, Ghosh -> G200
+;# Hilbert, Heilbronn -> H416
+;# Knuth, Kant -> K530
+;# Lloyd, Ladd -> L300
+;# Lukasiewicz, Lissajous -> L222
+;#
+;# $Log: soundex.pl,v $
+;# Revision 1.2 1994/03/24 00:30:27 mike
+;# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
+;# in the way I handles leasing characters which were different but had
+;# the same soundex code. This showed up comparing it with Oracle's
+;# soundex output.
+;#
+;# Revision 1.1 1994/03/02 13:01:30 mike
+;# Initial revision
+;#
+;#
+;##############################################################################
+X
+;# $soundex'noCode is used to indicate a string doesn't have a soundex
+;# code, I like undef other people may want to set it to 'Z000'.
+X
+$noCode = undef;
+X
+;# main'soundex
+;#
+;# usage:
+;#
+;# @codes = &main'soundex (@wordList);
+;# $code = &main'soundex ($word);
+;#
+;# This strenuously avoids $[
+X
+sub main'soundex
+{
+X local (@s, $f, $fc, $_) = @_;
+X
+X foreach (@s)
+X {
+X tr/a-z/A-Z/;
+X tr/A-Z//cd;
+X
+X if ($_ eq '')
+X {
+X $_ = $noCode;
+X }
+X else
+X {
+X ($f) = /^(.)/;
+X tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/;
+X ($fc) = /^(.)/;
+X s/^$fc+//;
+X tr///cs;
+X tr/0//d;
+X $_ = $f . $_ . '000';
+X s/^(.{4}).*/$1/;
+X }
+X }
+X
+X wantarray ? @s : shift @s;
+}
+X
+1;
+SHAR_EOF
+chmod 0444 soundex.pl ||
+echo 'restore of soundex.pl failed'
+Wc_c="`wc -c < 'soundex.pl'`"
+test 1677 -eq "$Wc_c" ||
+ echo 'soundex.pl: original size 1677, current size' "$Wc_c"
+fi
+# ============= soundex.t ==============
+if test -f 'soundex.t' -a X"$1" != X"-c"; then
+ echo 'x - skipping soundex.t (File already exists)'
+else
+echo 'x - extracting soundex.t (Text)'
+sed 's/^X//' << 'SHAR_EOF' > 'soundex.t' &&
+#!./perl
+;#
+;# $Id: soundex.t,v 1.2 1994/03/24 00:30:27 mike Exp $
+;#
+;# test module for soundex.pl
+;#
+;# $Log: soundex.t,v $
+;# Revision 1.2 1994/03/24 00:30:27 mike
+;# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
+;# in the way I handles leasing characters which were different but had
+;# the same soundex code. This showed up comparing it with Oracle's
+;# soundex output.
+;#
+;# Revision 1.1 1994/03/02 13:03:02 mike
+;# Initial revision
+;#
+;#
+X
+require '../lib/soundex.pl';
+X
+$test = 0;
+print "1..13\n";
+X
+while (<DATA>)
+{
+X chop;
+X next if /^\s*;?#/;
+X next if /^\s*$/;
+X
+X ++$test;
+X $bad = 0;
+X
+X if (/^eval\s+/)
+X {
+X ($try = $_) =~ s/^eval\s+//;
+X
+X eval ($try);
+X if ($@)
+X {
+X $bad++;
+X print "not ok $test\n";
+X print "# eval '$try' returned $@";
+X }
+X }
+X elsif (/^\(/)
+X {
+X ($in, $out) = split (':');
+X
+X $try = "\@expect = $out; \@got = &soundex $in;";
+X eval ($try);
+X
+X if (@expect != @got)
+X {
+X $bad++;
+X print "not ok $test\n";
+X print "# expected ", scalar @expect, " results, got ", scalar @got, "\n";
+X print "# expected (", join (', ', @expect),
+X ") got (", join (', ', @got), ")\n";
+X }
+X else
+X {
+X while (@got)
+X {
+X $expect = shift @expect;
+X $got = shift @got;
+X
+X if ($expect ne $got)
+X {
+X $bad++;
+X print "not ok $test\n";
+X print "# expected $expect, got $got\n";
+X }
+X }
+X }
+X }
+X else
+X {
+X ($in, $out) = split (':');
+X
+X $try = "\$expect = $out; \$got = &soundex ($in);";
+X eval ($try);
+X
+X if ($expect ne $got)
+X {
+X $bad++;
+X print "not ok $test\n";
+X print "# expected $expect, got $got\n";
+X }
+X }
+X
+X print "ok $test\n" unless $bad;
+}
+X
+__END__
+#
+# 1..6
+#
+# Knuth's test cases, scalar in, scalar out
+#
+'Euler':'E460'
+'Gauss':'G200'
+'Hilbert':'H416'
+'Knuth':'K530'
+'Lloyd':'L300'
+'Lukasiewicz':'L222'
+#
+# 7..8
+#
+# check default bad code
+#
+'2 + 2 = 4':undef
+undef:undef
+#
+# 9
+#
+# check array in, array out
+#
+('Ellery', 'Ghosh', 'Heilbronn', 'Kant', 'Ladd', 'Lissajous'):('E460', 'G200', 'H416', 'K530', 'L300', 'L222')
+#
+# 10
+#
+# check array with explicit undef
+#
+('Mike', undef, 'Stok'):('M200', undef, 'S320')
+#
+# 11..12
+#
+# check setting $soundex'noCode
+#
+eval $soundex'noCode = 'Z000';
+('Mike', undef, 'Stok'):('M200', 'Z000', 'S320')
+#
+# 13
+#
+# a subtle difference between me & oracle, spotted by Rich Pinder
+# <rpinder@hsc.usc.edu>
+#
+CZARKOWSKA:C622
+SHAR_EOF
+chmod 0555 soundex.t ||
+echo 'restore of soundex.t failed'
+Wc_c="`wc -c < 'soundex.t'`"
+test 2408 -eq "$Wc_c" ||
+ echo 'soundex.t: original size 2408, current size' "$Wc_c"
+fi
+exit 0
+
+--
+The "usual disclaimers" apply. | Meiko
+Mike Stok | 130C Baker Ave. Ext
+Mike.Stok@meiko.concord.ma.us | Concord, MA 01742
+Meiko tel: (508) 371 0088 |
+
+