summaryrefslogtreecommitdiff
path: root/cpan/Text-Soundex/Soundex.pm
diff options
context:
space:
mode:
Diffstat (limited to 'cpan/Text-Soundex/Soundex.pm')
-rw-r--r--cpan/Text-Soundex/Soundex.pm262
1 files changed, 0 insertions, 262 deletions
diff --git a/cpan/Text-Soundex/Soundex.pm b/cpan/Text-Soundex/Soundex.pm
deleted file mode 100644
index 83a55af43e..0000000000
--- a/cpan/Text-Soundex/Soundex.pm
+++ /dev/null
@@ -1,262 +0,0 @@
-# -*- perl -*-
-
-# (c) Copyright 1998-2007 by Mark Mielke
-#
-# Freedom to use these sources for whatever you want, as long as credit
-# is given where credit is due, is hereby granted. You may make modifications
-# where you see fit but leave this copyright somewhere visible. As well, try
-# to initial any changes you make so that if I like the changes I can
-# incorporate them into later versions.
-#
-# - Mark Mielke <mark@mielke.cc>
-#
-
-package Text::Soundex;
-require 5.006;
-
-use Exporter ();
-use XSLoader ();
-
-use strict;
-
-use if $] > 5.016, 'deprecate';
-
-our $VERSION = '3.04';
-our @EXPORT_OK = qw(soundex soundex_unicode soundex_nara soundex_nara_unicode
- $soundex_nocode);
-our @EXPORT = qw(soundex soundex_nara $soundex_nocode);
-our @ISA = qw(Exporter);
-
-our $nocode;
-
-# Previous releases of Text::Soundex made $nocode available as $soundex_nocode.
-# For now, this part of the interface is exported and maintained.
-# In the feature, $soundex_nocode will be deprecated.
-*Text::Soundex::soundex_nocode = \$nocode;
-
-sub soundex_noxs
-{
- # Original Soundex algorithm
-
- my @results = map {
- my $code = uc($_);
- $code =~ tr/AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr//cd;
-
- if (length($code)) {
- my $firstchar = substr($code, 0, 1);
- $code =~ tr[AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr]
- [0000000000000000111111112222222222222222333344555566]s;
- ($code = substr($code, 1)) =~ tr/0//d;
- substr($firstchar . $code . '000', 0, 4);
- } else {
- $nocode;
- }
- } @_;
-
- wantarray ? @results : $results[0];
-}
-
-sub soundex_nara
-{
- # US census (NARA) algorithm.
-
- my @results = map {
- my $code = uc($_);
- $code =~ tr/AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr//cd;
-
- if (length($code)) {
- my $firstchar = substr($code, 0, 1);
- $code =~ tr[AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr]
- [0000990000009900111111112222222222222222333344555566]s;
- $code =~ s/(.)9\1/$1/gs;
- ($code = substr($code, 1)) =~ tr/09//d;
- substr($firstchar . $code . '000', 0, 4);
- } else {
- $nocode
- }
- } @_;
-
- wantarray ? @results : $results[0];
-}
-
-sub soundex_unicode
-{
- require Text::Unidecode unless defined &Text::Unidecode::unidecode;
- soundex(Text::Unidecode::unidecode(@_));
-}
-
-sub soundex_nara_unicode
-{
- require Text::Unidecode unless defined &Text::Unidecode::unidecode;
- soundex_nara(Text::Unidecode::unidecode(@_));
-}
-
-eval { XSLoader::load(__PACKAGE__, $VERSION) };
-
-if (defined(&soundex_xs)) {
- *soundex = \&soundex_xs;
-} else {
- *soundex = \&soundex_noxs;
- *soundex_xs = sub {
- require Carp;
- Carp::croak("XS implementation of Text::Soundex::soundex_xs() ".
- "could not be loaded");
- };
-}
-
-1;
-
-__END__
-
-# Implementation of the soundex algorithm.
-#
-# Some of this documention was written by Mike Stok.
-#
-# Examples:
-#
-# Euler, Ellery -> E460
-# Gauss, Ghosh -> G200
-# Hilbert, Heilbronn -> H416
-# Knuth, Kant -> K530
-# Lloyd, Ladd -> L300
-# Lukasiewicz, Lissajous -> L222
-#
-
-=head1 NAME
-
-Text::Soundex - Implementation of the soundex algorithm.
-
-=head1 SYNOPSIS
-
- use Text::Soundex;
-
- # Original algorithm.
- $code = soundex($name); # Get the soundex code for a name.
- @codes = soundex(@names); # Get the list of codes for a list of names.
-
- # American Soundex variant (NARA) - Used for US census data.
- $code = soundex_nara($name); # Get the soundex code for a name.
- @codes = soundex_nara(@names); # Get the list of codes for a list of names.
-
- # Redefine the value that soundex() will return if the input string
- # contains no identifiable sounds within it.
- $Text::Soundex::nocode = 'Z000';
-
-=head1 DESCRIPTION
-
-Soundex is a phonetic algorithm for indexing names by sound, as
-pronounced in English. The goal is for names with the same
-pronunciation to be encoded to the same representation so that they
-can be matched despite minor differences in spelling. Soundex is the
-most widely known of all phonetic algorithms and is often used
-(incorrectly) as a synonym for "phonetic algorithm". Improvements to
-Soundex are the basis for many modern phonetic algorithms. (Wikipedia,
-2007)
-
-This module implements the original soundex algorithm developed by
-Robert Russell and Margaret Odell, patented in 1918 and 1922, as well
-as a variation called "American Soundex" used for US census data, and
-current maintained by the National Archives and Records Administration
-(NARA).
-
-The soundex algorithm may be recognized from Donald Knuth's
-B<The Art of Computer Programming>. The algorithm described by
-Knuth is the NARA algorithm.
-
-The value returned for strings which have no soundex encoding is
-defined using C<$Text::Soundex::nocode>. The default value is C<undef>,
-however values such as C<'Z000'> are commonly used alternatives.
-
-For backward compatibility with older versions of this module the
-C<$Text::Soundex::nocode> is exported into the caller's namespace as
-C<$soundex_nocode>.
-
-In scalar context, C<soundex()> returns the soundex code of its first
-argument. In list context, a list is returned in which each element is the
-soundex code for the corresponding argument passed to C<soundex()>. For
-example, the following code assigns @codes the value C<('M200', 'S320')>:
-
- @codes = soundex qw(Mike Stok);
-
-To use C<Text::Soundex> to generate codes that can be used to search one
-of the publically available US Censuses, a variant of the soundex
-algorithm must be used:
-
- use Text::Soundex;
- $code = soundex_nara($name);
-
-An example of where these algorithm differ follows:
-
- use Text::Soundex;
- print soundex("Ashcraft"), "\n"; # prints: A226
- print soundex_nara("Ashcraft"), "\n"; # prints: A261
-
-=head1 EXAMPLES
-
-Donald Knuth's examples of names and the soundex codes they map to
-are listed below:
-
- Euler, Ellery -> E460
- Gauss, Ghosh -> G200
- Hilbert, Heilbronn -> H416
- Knuth, Kant -> K530
- Lloyd, Ladd -> L300
- Lukasiewicz, Lissajous -> L222
-
-so:
-
- $code = soundex 'Knuth'; # $code contains 'K530'
- @list = soundex qw(Lloyd Gauss); # @list contains 'L300', 'G200'
-
-=head1 LIMITATIONS
-
-As the soundex algorithm was originally used a B<long> time ago in the US
-it considers only the English alphabet and pronunciation. In particular,
-non-ASCII characters will be ignored. The recommended method of dealing
-with characters that have accents, or other unicode characters, is to use
-the Text::Unidecode module available from CPAN. Either use the module
-explicitly:
-
- use Text::Soundex;
- use Text::Unidecode;
-
- print soundex(unidecode("Fran\xE7ais")), "\n"; # Prints "F652\n"
-
-Or use the convenient wrapper routine:
-
- use Text::Soundex 'soundex_unicode';
-
- print soundex_unicode("Fran\xE7ais"), "\n"; # Prints "F652\n"
-
-Since the soundex algorithm maps a large space (strings of arbitrary
-length) onto a small space (single letter plus 3 digits) no inference
-can be made about the similarity of two strings which end up with the
-same soundex code. For example, both C<Hilbert> and C<Heilbronn> end
-up with a soundex code of C<H416>.
-
-=head1 MAINTAINER
-
-This module is currently maintain by Mark Mielke (C<mark@mielke.cc>).
-
-=head1 HISTORY
-
-Version 3 is a significant update to provide support for versions of
-Perl later than Perl 5.004. Specifically, the XS version of the
-soundex() subroutine understands strings that are encoded using UTF-8
-(unicode strings).
-
-Version 2 of this module was a re-write by Mark Mielke (C<mark@mielke.cc>)
-to improve the speed of the subroutines. The XS version of the soundex()
-subroutine was introduced in 2.00.
-
-Version 1 of this module was written by Mike Stok (C<mike@stok.co.uk>)
-and was included into the Perl core library set.
-
-Dave Carlsen (C<dcarlsen@csranet.com>) made the request for the NARA
-algorithm to be included. The NARA soundex page can be viewed at:
-C<http://www.nara.gov/genealogy/soundex/soundex.html>
-
-Ian Phillips (C<ian@pipex.net>) and Rich Pinder (C<rpinder@hsc.usc.edu>)
-supplied ideas and spotted mistakes for v1.x.
-
-=cut