diff options
-rw-r--r-- | MANIFEST | 5 | ||||
-rwxr-xr-x | Porting/Maintainers.pl | 15 | ||||
-rw-r--r-- | cpan/Text-Soundex/Changes | 44 | ||||
-rw-r--r-- | cpan/Text-Soundex/README | 134 | ||||
-rw-r--r-- | cpan/Text-Soundex/Soundex.pm | 262 | ||||
-rw-r--r-- | cpan/Text-Soundex/Soundex.xs | 211 | ||||
-rw-r--r-- | cpan/Text-Soundex/t/Soundex.t | 138 |
7 files changed, 0 insertions, 809 deletions
@@ -2430,11 +2430,6 @@ cpan/Text-Balanced/t/09_gentag.t See if Text::Balanced works cpan/Text-ParseWords/lib/Text/ParseWords.pm Perl module to split words on arbitrary delimiter cpan/Text-ParseWords/t/ParseWords.t See if Text::ParseWords works cpan/Text-ParseWords/t/taint.t See if Text::ParseWords works with tainting -cpan/Text-Soundex/Changes Changelog for Text::Soundex -cpan/Text-Soundex/README README for Text::Soundex -cpan/Text-Soundex/Soundex.pm Text::Soundex extension Perl module -cpan/Text-Soundex/Soundex.xs Text::Soundex extension external subroutines -cpan/Text-Soundex/t/Soundex.t test for Text::Soundex cpan/Text-Tabs/CHANGELOG ChangeLog for Tabs+Wrap cpan/Text-Tabs/lib/Text/Tabs.pm Do expand and unexpand cpan/Text-Tabs/lib/Text/Wrap.pm Paragraph formatter diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index e2e8023dff..47459cede9 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -1749,21 +1749,6 @@ use File::Glob qw(:case); 'UPSTREAM' => undef, }, - 'Text::Soundex' => { - 'MAINTAINER' => 'markm', - 'DISTRIBUTION' => 'RJBS/Text-Soundex-3.04.tar.gz', - 'FILES' => q[cpan/Text-Soundex], - 'MAP' => { - '' => 'cpan/Text-Soundex/', - - # XXX these two files are clearly related, - # but they appear to have diverged - # considerably over the years - 'test.pl' => 'cpan/Text-Soundex/t/Soundex.t', - }, - 'UPSTREAM' => undef, - }, - 'Text-Tabs+Wrap' => { 'MAINTAINER' => 'muir', 'DISTRIBUTION' => 'MUIR/modules/Text-Tabs+Wrap-2012.0818.tar.gz', diff --git a/cpan/Text-Soundex/Changes b/cpan/Text-Soundex/Changes deleted file mode 100644 index 364962b192..0000000000 --- a/cpan/Text-Soundex/Changes +++ /dev/null @@ -1,44 +0,0 @@ -Revision history for Perl extension Text::Soundex. - -3.04 Thu Feb 7 15:53:09 EST 2013 <rjbs@cpan.org> - -The module is going to be removed from the core distribution of perl, and will -now warn (under warnings) if loaded from its installed-to-core location. - -3.02 Sun Feb 02 02:54:00 EST 2003 <mark@mielke.cc> - -The U8 type was over-used in 3.00 and 3.01. Now, "U8 *" is used only as a -pointer into the UTF-8 string. Also, unicode now works properly on -Perl 5.6.x as the utf8_to_uv() function is used instead of utf8n_to_uvchr() -when compiled under a version of Perl earlier than 5.8.0. - -3.01 Sun Jan 26 16:30:00 EST 2003 <mark@mielke.cc> - -A bug with non-UTF 8 strings that contain non-ASCII alphabetic characters -was fixed. The soundex_unicode() and soundex_nara_unicode() wrapper -routines were included and the documentation refers the user to the -excellent Text::Unidecode module to perform soundex encodings using -unicode strings. The Perl versions of the routines have been further -optimized, and correct a border case involving non-alphabetic characters -at the beginning of the string. - -3.00 Sun Jan 26 04:08:00 EST 2003 <mark@mielke.cc> - -Updated documentation, simplified the Perl interface, and updated -the XS code to be faster, and to properly work with UTF-8 strings. -UNICODE characters outside the ASCII range (0x00 - 0x7F) are -considered to be non-alphabetic for the purposes of the soundex -algorithms. - -2.10 Sun Feb 15 15:29:38 EST 1998 <mark@mielke.cc> - -I've put in a version of my XS code and fully integrated it with the -existing 100% perl mechanism. The change should be virtually transparent -to the user. XS code is approx 7.5 times faster. - - Mark Mielke - -2.00 Thu Jan 1 16:22:11 1998 <mike@stok.co.uk> - -Incorporated Mark Mielke's rewritten version of the main soundex routine -and made the test.pl file simpler. - diff --git a/cpan/Text-Soundex/README b/cpan/Text-Soundex/README deleted file mode 100644 index 3c3b588e09..0000000000 --- a/cpan/Text-Soundex/README +++ /dev/null @@ -1,134 +0,0 @@ -Text::Soundex - Implementation of the soundex algorithm. - -Basic Usage: - - Soundex is used to do a one way transformation of a name, converting - a character string given as input into a set of codes representing - the identifiable sounds those characters might make in the output. - - For example: - - use Text::Soundex; - - print soundex("Mark"), "\n"; # prints: M620 - print soundex("Marc"), "\n"; # prints: M620 - - print soundex("Hansen"), "\n"; # prints: H525 - print soundex("Hanson"), "\n"; # prints: H525 - print soundex("Henson"), "\n"; # prints: H525 - - In many situations, code such as the following: - - if ($name1 eq $name2) { - ... - } - - Can be substituted with: - - if (soundex($name1) eq soundex($name2)) { - ... - } - -Installation: - - Once the archive has been unpacked then the following steps are needed - to build, test and install the module (to be done in the directory which - contains the Makefile.PL) - - perl Makefile.PL - make - make test - - If the make test succeeds then the next step may need to be run as root - (on a Unix-like system) or with special privileges on other systems. - - make install - - If you do not want to use the XS code (for whatever reason) do the following - instead of the above: - - perl Makefile.PL --no-xs - make - make test - make install - - If any of the tests report 'not ok' and you are running perl 5.6.0 or later - then please contact Mark Mielke <mark@mielke.cc> - -History: - - Version 3.03: - Updated to allow the XS implementation to work properly under an - EBCDIC/EBCDIC-UTF8 character set environment. - - Updated documentation to better describe the history of the - soundex algorithm and how it applies to this module. - - Version 3.02: - 3.01 and 3.00 used the 'U8' type incorrectly causing some strict - compilers to complain or refuse to compile the XS code. Also, Unicode - support did not work properly for Perl 5.6.x. Both of these problems - are now fixed. - - Version 3.01: - A bug with non-UTF 8 strings that contain non-ASCII alphabetic characters - was fixed. The soundex_unicode() and soundex_nara_unicode() wrapper - routines were included and the documentation refers the user to the - excellent Text::Unidecode module to perform soundex encodings using - unicode strings. The Perl versions of the routines have been further - optimized, and correct a border case involving non-alphabetic characters - at the beginning of the string. - - Version 3.00: - Support for UTF-8 strings (unicode strings) is now in place. Note - that this allows UTF-8 strings to be passed to the XS version of - the soundex() routine. The Soundex algorithm treats characters - outside the ascii range (0x00 - 0x7F) as if they were not - alphabetical. - - The interface has been simplified. In order to explicitly use the - non-XS implementation of soundex(): - - use Text::Soundex (); - $code = Text::Soundex::soundex_noxs($name); - - In order to use the NARA soundex algorithm: - - use Text::Soundex 'soundex_nara'; - $code = soundex_nara($name); - - Use of the ':NARA-Ruleset' import directive is now obsolete. To - emulate the old behaviour: - - use Text::Soundex (); - *soundex = \&Text::Soundex::soundex_nara; - $code = soundex($name); - - Version 2.20: - This version includes support for the algorithm used to index - the U.S. Federal Censuses. There is a slight descrepancy in the - definition for a soundex code which is not commonly known or - recognized involved similar sounding letters being seperated - by the characters H or W. This is defined as the NARA ruleset, - as this descrepency was discovered by them. (Calling it "the - US Census ruleset" was too unwieldy...) - - NARA can be found at: - http://www.nara.gov/genealogy/ - - The algorithm used by NARA can be found at: - http://home.utah-inter.net/kinsearch/Soundex.html - - Version 2.00: - This version is a full re-write of the 1.0 engine by Mark Mielke. - The goal was for speed... and this was achieved. There is an optional - XS module which can be used completely transparently by the user - which offers a further speed increase of a factor of more than 7.5X. - - Version 1.00: - This version can be found in the perl core distribution from at - least Perl 5.8.0 and down. It was written by Mike Stok. It can be - identified by the fact that it does not contain a $VERSION - in the beginning of the module, and as well it uses an RCS - tag with a version of 1.x. This version, before some perl5'ish - packaging was introduced, was actually written for perl4. diff --git a/cpan/Text-Soundex/Soundex.pm b/cpan/Text-Soundex/Soundex.pm deleted file mode 100644 index 83a55af43e..0000000000 --- a/cpan/Text-Soundex/Soundex.pm +++ /dev/null @@ -1,262 +0,0 @@ -# -*- perl -*- - -# (c) Copyright 1998-2007 by Mark Mielke -# -# Freedom to use these sources for whatever you want, as long as credit -# is given where credit is due, is hereby granted. You may make modifications -# where you see fit but leave this copyright somewhere visible. As well, try -# to initial any changes you make so that if I like the changes I can -# incorporate them into later versions. -# -# - Mark Mielke <mark@mielke.cc> -# - -package Text::Soundex; -require 5.006; - -use Exporter (); -use XSLoader (); - -use strict; - -use if $] > 5.016, 'deprecate'; - -our $VERSION = '3.04'; -our @EXPORT_OK = qw(soundex soundex_unicode soundex_nara soundex_nara_unicode - $soundex_nocode); -our @EXPORT = qw(soundex soundex_nara $soundex_nocode); -our @ISA = qw(Exporter); - -our $nocode; - -# Previous releases of Text::Soundex made $nocode available as $soundex_nocode. -# For now, this part of the interface is exported and maintained. -# In the feature, $soundex_nocode will be deprecated. -*Text::Soundex::soundex_nocode = \$nocode; - -sub soundex_noxs -{ - # Original Soundex algorithm - - my @results = map { - my $code = uc($_); - $code =~ tr/AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr//cd; - - if (length($code)) { - my $firstchar = substr($code, 0, 1); - $code =~ tr[AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr] - [0000000000000000111111112222222222222222333344555566]s; - ($code = substr($code, 1)) =~ tr/0//d; - substr($firstchar . $code . '000', 0, 4); - } else { - $nocode; - } - } @_; - - wantarray ? @results : $results[0]; -} - -sub soundex_nara -{ - # US census (NARA) algorithm. - - my @results = map { - my $code = uc($_); - $code =~ tr/AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr//cd; - - if (length($code)) { - my $firstchar = substr($code, 0, 1); - $code =~ tr[AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr] - [0000990000009900111111112222222222222222333344555566]s; - $code =~ s/(.)9\1/$1/gs; - ($code = substr($code, 1)) =~ tr/09//d; - substr($firstchar . $code . '000', 0, 4); - } else { - $nocode - } - } @_; - - wantarray ? @results : $results[0]; -} - -sub soundex_unicode -{ - require Text::Unidecode unless defined &Text::Unidecode::unidecode; - soundex(Text::Unidecode::unidecode(@_)); -} - -sub soundex_nara_unicode -{ - require Text::Unidecode unless defined &Text::Unidecode::unidecode; - soundex_nara(Text::Unidecode::unidecode(@_)); -} - -eval { XSLoader::load(__PACKAGE__, $VERSION) }; - -if (defined(&soundex_xs)) { - *soundex = \&soundex_xs; -} else { - *soundex = \&soundex_noxs; - *soundex_xs = sub { - require Carp; - Carp::croak("XS implementation of Text::Soundex::soundex_xs() ". - "could not be loaded"); - }; -} - -1; - -__END__ - -# Implementation of the soundex algorithm. -# -# Some of this documention was written by Mike Stok. -# -# Examples: -# -# Euler, Ellery -> E460 -# Gauss, Ghosh -> G200 -# Hilbert, Heilbronn -> H416 -# Knuth, Kant -> K530 -# Lloyd, Ladd -> L300 -# Lukasiewicz, Lissajous -> L222 -# - -=head1 NAME - -Text::Soundex - Implementation of the soundex algorithm. - -=head1 SYNOPSIS - - use Text::Soundex; - - # Original algorithm. - $code = soundex($name); # Get the soundex code for a name. - @codes = soundex(@names); # Get the list of codes for a list of names. - - # American Soundex variant (NARA) - Used for US census data. - $code = soundex_nara($name); # Get the soundex code for a name. - @codes = soundex_nara(@names); # Get the list of codes for a list of names. - - # Redefine the value that soundex() will return if the input string - # contains no identifiable sounds within it. - $Text::Soundex::nocode = 'Z000'; - -=head1 DESCRIPTION - -Soundex is a phonetic algorithm for indexing names by sound, as -pronounced in English. The goal is for names with the same -pronunciation to be encoded to the same representation so that they -can be matched despite minor differences in spelling. Soundex is the -most widely known of all phonetic algorithms and is often used -(incorrectly) as a synonym for "phonetic algorithm". Improvements to -Soundex are the basis for many modern phonetic algorithms. (Wikipedia, -2007) - -This module implements the original soundex algorithm developed by -Robert Russell and Margaret Odell, patented in 1918 and 1922, as well -as a variation called "American Soundex" used for US census data, and -current maintained by the National Archives and Records Administration -(NARA). - -The soundex algorithm may be recognized from Donald Knuth's -B<The Art of Computer Programming>. The algorithm described by -Knuth is the NARA algorithm. - -The value returned for strings which have no soundex encoding is -defined using C<$Text::Soundex::nocode>. The default value is C<undef>, -however values such as C<'Z000'> are commonly used alternatives. - -For backward compatibility with older versions of this module the -C<$Text::Soundex::nocode> is exported into the caller's namespace as -C<$soundex_nocode>. - -In scalar context, C<soundex()> returns the soundex code of its first -argument. In list context, a list is returned in which each element is the -soundex code for the corresponding argument passed to C<soundex()>. For -example, the following code assigns @codes the value C<('M200', 'S320')>: - - @codes = soundex qw(Mike Stok); - -To use C<Text::Soundex> to generate codes that can be used to search one -of the publically available US Censuses, a variant of the soundex -algorithm must be used: - - use Text::Soundex; - $code = soundex_nara($name); - -An example of where these algorithm differ follows: - - use Text::Soundex; - print soundex("Ashcraft"), "\n"; # prints: A226 - print soundex_nara("Ashcraft"), "\n"; # prints: A261 - -=head1 EXAMPLES - -Donald Knuth's examples of names and the soundex codes they map to -are listed below: - - Euler, Ellery -> E460 - Gauss, Ghosh -> G200 - Hilbert, Heilbronn -> H416 - Knuth, Kant -> K530 - Lloyd, Ladd -> L300 - Lukasiewicz, Lissajous -> L222 - -so: - - $code = soundex 'Knuth'; # $code contains 'K530' - @list = soundex qw(Lloyd Gauss); # @list contains 'L300', 'G200' - -=head1 LIMITATIONS - -As the soundex algorithm was originally used a B<long> time ago in the US -it considers only the English alphabet and pronunciation. In particular, -non-ASCII characters will be ignored. The recommended method of dealing -with characters that have accents, or other unicode characters, is to use -the Text::Unidecode module available from CPAN. Either use the module -explicitly: - - use Text::Soundex; - use Text::Unidecode; - - print soundex(unidecode("Fran\xE7ais")), "\n"; # Prints "F652\n" - -Or use the convenient wrapper routine: - - use Text::Soundex 'soundex_unicode'; - - print soundex_unicode("Fran\xE7ais"), "\n"; # Prints "F652\n" - -Since the soundex algorithm maps a large space (strings of arbitrary -length) onto a small space (single letter plus 3 digits) no inference -can be made about the similarity of two strings which end up with the -same soundex code. For example, both C<Hilbert> and C<Heilbronn> end -up with a soundex code of C<H416>. - -=head1 MAINTAINER - -This module is currently maintain by Mark Mielke (C<mark@mielke.cc>). - -=head1 HISTORY - -Version 3 is a significant update to provide support for versions of -Perl later than Perl 5.004. Specifically, the XS version of the -soundex() subroutine understands strings that are encoded using UTF-8 -(unicode strings). - -Version 2 of this module was a re-write by Mark Mielke (C<mark@mielke.cc>) -to improve the speed of the subroutines. The XS version of the soundex() -subroutine was introduced in 2.00. - -Version 1 of this module was written by Mike Stok (C<mike@stok.co.uk>) -and was included into the Perl core library set. - -Dave Carlsen (C<dcarlsen@csranet.com>) made the request for the NARA -algorithm to be included. The NARA soundex page can be viewed at: -C<http://www.nara.gov/genealogy/soundex/soundex.html> - -Ian Phillips (C<ian@pipex.net>) and Rich Pinder (C<rpinder@hsc.usc.edu>) -supplied ideas and spotted mistakes for v1.x. - -=cut diff --git a/cpan/Text-Soundex/Soundex.xs b/cpan/Text-Soundex/Soundex.xs deleted file mode 100644 index d14247132b..0000000000 --- a/cpan/Text-Soundex/Soundex.xs +++ /dev/null @@ -1,211 +0,0 @@ -/* -*- c -*- */ - -/* (c) Copyright 1998-2003 by Mark Mielke - * - * Freedom to use these sources for whatever you want, as long as credit - * is given where credit is due, is hereby granted. You may make modifications - * where you see fit but leave this copyright somewhere visible. As well try - * to initial any changes you make so that if i like the changes i can - * incorporate them into any later versions of mine. - * - * - Mark Mielke <mark@mielke.cc> - */ - -#include "EXTERN.h" -#include "perl.h" -#include "XSUB.h" - -#define SOUNDEX_ACCURACY (4) /* The maximum code length... (should be>=2) */ - -#if !(PERL_REVISION >= 5 && PERL_VERSION >= 8) -# define utf8n_to_uvchr utf8_to_uv -#endif - -static char sv_soundex_table[0x100]; -static void sv_soundex_initialize (void) -{ - memset(&sv_soundex_table[0], '\0', sizeof(sv_soundex_table)); - sv_soundex_table['A'] = '0'; - sv_soundex_table['a'] = '0'; - sv_soundex_table['E'] = '0'; - sv_soundex_table['e'] = '0'; - sv_soundex_table['H'] = '0'; - sv_soundex_table['h'] = '0'; - sv_soundex_table['I'] = '0'; - sv_soundex_table['i'] = '0'; - sv_soundex_table['O'] = '0'; - sv_soundex_table['o'] = '0'; - sv_soundex_table['U'] = '0'; - sv_soundex_table['u'] = '0'; - sv_soundex_table['W'] = '0'; - sv_soundex_table['w'] = '0'; - sv_soundex_table['Y'] = '0'; - sv_soundex_table['y'] = '0'; - sv_soundex_table['B'] = '1'; - sv_soundex_table['b'] = '1'; - sv_soundex_table['F'] = '1'; - sv_soundex_table['f'] = '1'; - sv_soundex_table['P'] = '1'; - sv_soundex_table['p'] = '1'; - sv_soundex_table['V'] = '1'; - sv_soundex_table['v'] = '1'; - sv_soundex_table['C'] = '2'; - sv_soundex_table['c'] = '2'; - sv_soundex_table['G'] = '2'; - sv_soundex_table['g'] = '2'; - sv_soundex_table['J'] = '2'; - sv_soundex_table['j'] = '2'; - sv_soundex_table['K'] = '2'; - sv_soundex_table['k'] = '2'; - sv_soundex_table['Q'] = '2'; - sv_soundex_table['q'] = '2'; - sv_soundex_table['S'] = '2'; - sv_soundex_table['s'] = '2'; - sv_soundex_table['X'] = '2'; - sv_soundex_table['x'] = '2'; - sv_soundex_table['Z'] = '2'; - sv_soundex_table['z'] = '2'; - sv_soundex_table['D'] = '3'; - sv_soundex_table['d'] = '3'; - sv_soundex_table['T'] = '3'; - sv_soundex_table['t'] = '3'; - sv_soundex_table['L'] = '4'; - sv_soundex_table['l'] = '4'; - sv_soundex_table['M'] = '5'; - sv_soundex_table['m'] = '5'; - sv_soundex_table['N'] = '5'; - sv_soundex_table['n'] = '5'; - sv_soundex_table['R'] = '6'; - sv_soundex_table['r'] = '6'; -} - -static SV *sv_soundex (SV* source) -{ - char *source_p; - char *source_end; - - { - STRLEN source_len; - source_p = SvPV(source, source_len); - source_end = &source_p[source_len]; - } - - while (source_p != source_end) - { - char codepart_last = sv_soundex_table[(unsigned char) *source_p]; - - if (codepart_last != '\0') - { - SV *code = newSV(SOUNDEX_ACCURACY); - char *code_p = SvPVX(code); - char *code_end = &code_p[SOUNDEX_ACCURACY]; - - SvCUR_set(code, SOUNDEX_ACCURACY); - SvPOK_only(code); - - *code_p++ = toupper(*source_p++); - - while (source_p != source_end && code_p != code_end) - { - char c = *source_p++; - char codepart = sv_soundex_table[(unsigned char) c]; - - if (codepart != '\0') - if (codepart != codepart_last && (codepart_last = codepart) != '0') - *code_p++ = codepart; - } - - while (code_p != code_end) - *code_p++ = '0'; - - *code_end = '\0'; - - return code; - } - - source_p++; - } - - return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE)); -} - -static SV *sv_soundex_utf8 (SV* source) -{ - U8 *source_p; - U8 *source_end; - - { - STRLEN source_len; - source_p = (U8 *) SvPV(source, source_len); - source_end = &source_p[source_len]; - } - - while (source_p < source_end) - { - STRLEN offset; - UV c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0); - char codepart_last = (c <= 0xFF) ? sv_soundex_table[c] : '\0'; - source_p = (offset >= 1) ? &source_p[offset] : source_end; - - if (codepart_last != '\0') - { - SV *code = newSV(SOUNDEX_ACCURACY); - char *code_p = SvPVX(code); - char *code_end = &code_p[SOUNDEX_ACCURACY]; - - SvCUR_set(code, SOUNDEX_ACCURACY); - SvPOK_only(code); - - *code_p++ = toupper(c); - - while (source_p != source_end && code_p != code_end) - { - char codepart; - c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0); - codepart = (c <= 0xFF) ? sv_soundex_table[c] : '\0'; - source_p = (offset >= 1) ? &source_p[offset] : source_end; - - if (codepart != '\0') - if (codepart != codepart_last && (codepart_last = codepart) != '0') - *code_p++ = codepart; - } - - while (code_p != code_end) - *code_p++ = '0'; - - *code_end = '\0'; - - return code; - } - - source_p++; - } - - return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE)); -} - -MODULE = Text::Soundex PACKAGE = Text::Soundex - -PROTOTYPES: DISABLE - -void -soundex_xs (...) -INIT: -{ - sv_soundex_initialize(); -} -PPCODE: -{ - int i; - for (i = 0; i < items; i++) - { - SV *sv = ST(i); - - if (DO_UTF8(sv)) - sv = sv_soundex_utf8(sv); - else - sv = sv_soundex(sv); - - PUSHs(sv_2mortal(sv)); - } -} diff --git a/cpan/Text-Soundex/t/Soundex.t b/cpan/Text-Soundex/t/Soundex.t deleted file mode 100644 index a48fb4abe0..0000000000 --- a/cpan/Text-Soundex/t/Soundex.t +++ /dev/null @@ -1,138 +0,0 @@ -#!./perl -# -# $Id: soundex.t,v 1.2 1994/03/24 00:30:27 mike Exp $ -# -# test module for soundex.pl -# -# $Log: soundex.t,v $ -# Revision 1.2 1994/03/24 00:30:27 mike -# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu> -# in the way I handles leasing characters which were different but had -# the same soundex code. This showed up comparing it with Oracle's -# soundex output. -# -# Revision 1.1 1994/03/02 13:03:02 mike -# Initial revision -# -# - -use Text::Soundex; - -$test = 0; -print "1..13\n"; - -while (<DATA>) -{ - chop; - next if /^\s*;?#/; - next if /^\s*$/; - - ++$test; - $bad = 0; - - if (/^eval\s+/) - { - ($try = $_) =~ s/^eval\s+//; - - eval ($try); - if ($@) - { - $bad++; - print "not ok $test\n"; - print "# eval '$try' returned $@"; - } - } - elsif (/^\(/) - { - ($in, $out) = split (':'); - - $try = "\@expect = $out; \@got = &soundex $in;"; - eval ($try); - - if (@expect != @got) - { - $bad++; - print "not ok $test\n"; - print "# expected ", scalar @expect, " results, got ", scalar @got, "\n"; - print "# expected (", join (', ', @expect), - ") got (", join (', ', @got), ")\n"; - } - else - { - while (@got) - { - $expect = shift @expect; - $got = shift @got; - - if ($expect ne $got) - { - $bad++; - print "not ok $test\n"; - print "# expected $expect, got $got\n"; - } - } - } - } - else - { - ($in, $out) = split (':'); - - $try = "\$expect = $out; \$got = &soundex ($in);"; - eval ($try); - - if ($expect ne $got) - { - $bad++; - print "not ok $test\n"; - print "# expected $expect, got $got\n"; - } - } - - print "ok $test\n" unless $bad; -} - -__END__ -# -# 1..6 -# -# Knuth's test cases, scalar in, scalar out -# -'Euler':'E460' -'Gauss':'G200' -'Hilbert':'H416' -'Knuth':'K530' -'Lloyd':'L300' -'Lukasiewicz':'L222' -# -# 7..8 -# -# check default bad code -# -'2 + 2 = 4':undef -undef:undef -# -# 9 -# -# check array in, array out -# -('Ellery', 'Ghosh', 'Heilbronn', 'Kant', 'Ladd', 'Lissajous'):('E460', 'G200', 'H416', 'K530', 'L300', 'L222') -# -# 10 -# -# check array with explicit undef -# -('Mike', undef, 'Stok'):('M200', undef, 'S320') -# -# 11..12 -# -# check setting $Text::Soundex::noCode -# -eval $soundex_nocode = 'Z000'; -('Mike', undef, 'Stok'):('M200', 'Z000', 'S320') -# -# 13 -# -# a subtle difference between me & oracle, spotted by Rich Pinder -# <rpinder@hsc.usc.edu> -# -CZARKOWSKA:C622 |