diff options
Diffstat (limited to 'lib/soundex.pl.art')
-rw-r--r-- | lib/soundex.pl.art | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/lib/soundex.pl.art b/lib/soundex.pl.art new file mode 100644 index 0000000000..1cc0b9e53c --- /dev/null +++ b/lib/soundex.pl.art @@ -0,0 +1,285 @@ +Article 20106 of comp.lang.perl: +Path: netlabs!news.cerf.net!ihnp4.ucsd.edu!mvb.saic.com!MathWorks.Com!noc.near.net!newshost.meiko.com!not-for-mail +From: mike@meiko.com (Mike Stok) +Newsgroups: comp.lang.perl +Subject: Soundex (again :-) +Date: 23 Mar 1994 19:44:35 -0500 +Organization: Meiko Scientific, Inc., MA +Lines: 272 +Message-ID: <2mqnpj$qk4@hibbert.meiko.com> +NNTP-Posting-Host: hibbert.meiko.com + +Thanks to Rich Pinder <rpinder@hsc.usc.edu> for finding a little bug in my +soundex code I posted a while back. This showed up when he compared it +with the output from Oracle's soundex function, and were caused by leading +characters which were different but shared the same soundex code. + +Here's a fixed shar file... + +Mike + +#!/bin/sh +# This is a shell archive (produced by shar 3.49) +# To extract the files from this archive, save it to a file, remove +# everything above the "!/bin/sh" line above, and type "sh file_name". +# +# made 03/24/1994 00:35 UTC by Mike.Stok@meiko.concord.ma.us +# Source directory /tmp_mnt/develop/sw/misc/mike/soundex +# +# existing files will NOT be overwritten unless -c is specified +# +# This shar contains: +# length mode name +# ------ ---------- ------------------------------------------ +# 1677 -r--r--r-- soundex.pl +# 2408 -r-xr-xr-x soundex.t +# +# ============= soundex.pl ============== +if test -f 'soundex.pl' -a X"$1" != X"-c"; then + echo 'x - skipping soundex.pl (File already exists)' +else +echo 'x - extracting soundex.pl (Text)' +sed 's/^X//' << 'SHAR_EOF' > 'soundex.pl' && +package soundex; +X +;# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $ +;# +;# Implementation of soundex algorithm as described by Knuth in volume +;# 3 of The Art of Computer Programming, with ideas stolen from Ian +;# Phillips <ian@pipex.net>. +;# +;# Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994. +;# +;# Knuth's test cases are: +;# +;# Euler, Ellery -> E460 +;# Gauss, Ghosh -> G200 +;# Hilbert, Heilbronn -> H416 +;# Knuth, Kant -> K530 +;# Lloyd, Ladd -> L300 +;# Lukasiewicz, Lissajous -> L222 +;# +;# $Log: soundex.pl,v $ +;# Revision 1.2 1994/03/24 00:30:27 mike +;# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu> +;# in the way I handles leasing characters which were different but had +;# the same soundex code. This showed up comparing it with Oracle's +;# soundex output. +;# +;# Revision 1.1 1994/03/02 13:01:30 mike +;# Initial revision +;# +;# +;############################################################################## +X +;# $soundex'noCode is used to indicate a string doesn't have a soundex +;# code, I like undef other people may want to set it to 'Z000'. +X +$noCode = undef; +X +;# main'soundex +;# +;# usage: +;# +;# @codes = &main'soundex (@wordList); +;# $code = &main'soundex ($word); +;# +;# This strenuously avoids $[ +X +sub main'soundex +{ +X local (@s, $f, $fc, $_) = @_; +X +X foreach (@s) +X { +X tr/a-z/A-Z/; +X tr/A-Z//cd; +X +X if ($_ eq '') +X { +X $_ = $noCode; +X } +X else +X { +X ($f) = /^(.)/; +X tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/; +X ($fc) = /^(.)/; +X s/^$fc+//; +X tr///cs; +X tr/0//d; +X $_ = $f . $_ . '000'; +X s/^(.{4}).*/$1/; +X } +X } +X +X wantarray ? @s : shift @s; +} +X +1; +SHAR_EOF +chmod 0444 soundex.pl || +echo 'restore of soundex.pl failed' +Wc_c="`wc -c < 'soundex.pl'`" +test 1677 -eq "$Wc_c" || + echo 'soundex.pl: original size 1677, current size' "$Wc_c" +fi +# ============= soundex.t ============== +if test -f 'soundex.t' -a X"$1" != X"-c"; then + echo 'x - skipping soundex.t (File already exists)' +else +echo 'x - extracting soundex.t (Text)' +sed 's/^X//' << 'SHAR_EOF' > 'soundex.t' && +#!./perl +;# +;# $Id: soundex.t,v 1.2 1994/03/24 00:30:27 mike Exp $ +;# +;# test module for soundex.pl +;# +;# $Log: soundex.t,v $ +;# Revision 1.2 1994/03/24 00:30:27 mike +;# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu> +;# in the way I handles leasing characters which were different but had +;# the same soundex code. This showed up comparing it with Oracle's +;# soundex output. +;# +;# Revision 1.1 1994/03/02 13:03:02 mike +;# Initial revision +;# +;# +X +require '../lib/soundex.pl'; +X +$test = 0; +print "1..13\n"; +X +while (<DATA>) +{ +X chop; +X next if /^\s*;?#/; +X next if /^\s*$/; +X +X ++$test; +X $bad = 0; +X +X if (/^eval\s+/) +X { +X ($try = $_) =~ s/^eval\s+//; +X +X eval ($try); +X if ($@) +X { +X $bad++; +X print "not ok $test\n"; +X print "# eval '$try' returned $@"; +X } +X } +X elsif (/^\(/) +X { +X ($in, $out) = split (':'); +X +X $try = "\@expect = $out; \@got = &soundex $in;"; +X eval ($try); +X +X if (@expect != @got) +X { +X $bad++; +X print "not ok $test\n"; +X print "# expected ", scalar @expect, " results, got ", scalar @got, "\n"; +X print "# expected (", join (', ', @expect), +X ") got (", join (', ', @got), ")\n"; +X } +X else +X { +X while (@got) +X { +X $expect = shift @expect; +X $got = shift @got; +X +X if ($expect ne $got) +X { +X $bad++; +X print "not ok $test\n"; +X print "# expected $expect, got $got\n"; +X } +X } +X } +X } +X else +X { +X ($in, $out) = split (':'); +X +X $try = "\$expect = $out; \$got = &soundex ($in);"; +X eval ($try); +X +X if ($expect ne $got) +X { +X $bad++; +X print "not ok $test\n"; +X print "# expected $expect, got $got\n"; +X } +X } +X +X print "ok $test\n" unless $bad; +} +X +__END__ +# +# 1..6 +# +# Knuth's test cases, scalar in, scalar out +# +'Euler':'E460' +'Gauss':'G200' +'Hilbert':'H416' +'Knuth':'K530' +'Lloyd':'L300' +'Lukasiewicz':'L222' +# +# 7..8 +# +# check default bad code +# +'2 + 2 = 4':undef +undef:undef +# +# 9 +# +# check array in, array out +# +('Ellery', 'Ghosh', 'Heilbronn', 'Kant', 'Ladd', 'Lissajous'):('E460', 'G200', 'H416', 'K530', 'L300', 'L222') +# +# 10 +# +# check array with explicit undef +# +('Mike', undef, 'Stok'):('M200', undef, 'S320') +# +# 11..12 +# +# check setting $soundex'noCode +# +eval $soundex'noCode = 'Z000'; +('Mike', undef, 'Stok'):('M200', 'Z000', 'S320') +# +# 13 +# +# a subtle difference between me & oracle, spotted by Rich Pinder +# <rpinder@hsc.usc.edu> +# +CZARKOWSKA:C622 +SHAR_EOF +chmod 0555 soundex.t || +echo 'restore of soundex.t failed' +Wc_c="`wc -c < 'soundex.t'`" +test 2408 -eq "$Wc_c" || + echo 'soundex.t: original size 2408, current size' "$Wc_c" +fi +exit 0 + +-- +The "usual disclaimers" apply. | Meiko +Mike Stok | 130C Baker Ave. Ext +Mike.Stok@meiko.concord.ma.us | Concord, MA 01742 +Meiko tel: (508) 371 0088 | + + |