# -*- perl -*-
# (c) Copyright 1998-2007 by Mark Mielke
#
# Freedom to use these sources for whatever you want, as long as credit
# is given where credit is due, is hereby granted. You may make modifications
# where you see fit but leave this copyright somewhere visible. As well, try
# to initial any changes you make so that if I like the changes I can
# incorporate them into later versions.
#
# - Mark Mielke
#
package Text::Soundex;
require 5.006;
use Exporter ();
use XSLoader ();
use strict;
our $VERSION = '3.03';
our @EXPORT_OK = qw(soundex soundex_unicode soundex_nara soundex_nara_unicode
$soundex_nocode);
our @EXPORT = qw(soundex soundex_nara $soundex_nocode);
our @ISA = qw(Exporter);
our $nocode;
# Previous releases of Text::Soundex made $nocode available as $soundex_nocode.
# For now, this part of the interface is exported and maintained.
# In the feature, $soundex_nocode will be deprecated.
*Text::Soundex::soundex_nocode = \$nocode;
sub soundex_noxs
{
# Original Soundex algorithm
my @results = map {
my $code = uc($_);
$code =~ tr/AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr//cd;
if (length($code)) {
my $firstchar = substr($code, 0, 1);
$code =~ tr[AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr]
[0000000000000000111111112222222222222222333344555566]s;
($code = substr($code, 1)) =~ tr/0//d;
substr($firstchar . $code . '000', 0, 4);
} else {
$nocode;
}
} @_;
wantarray ? @results : $results[0];
}
sub soundex_nara
{
# US census (NARA) algorithm.
my @results = map {
my $code = uc($_);
$code =~ tr/AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr//cd;
if (length($code)) {
my $firstchar = substr($code, 0, 1);
$code =~ tr[AaEeHhIiOoUuWwYyBbFfPpVvCcGgJjKkQqSsXxZzDdTtLlMmNnRr]
[0000990000009900111111112222222222222222333344555566]s;
$code =~ s/(.)9\1/$1/gs;
($code = substr($code, 1)) =~ tr/09//d;
substr($firstchar . $code . '000', 0, 4);
} else {
$nocode
}
} @_;
wantarray ? @results : $results[0];
}
sub soundex_unicode
{
require Text::Unidecode unless defined &Text::Unidecode::unidecode;
soundex(Text::Unidecode::unidecode(@_));
}
sub soundex_nara_unicode
{
require Text::Unidecode unless defined &Text::Unidecode::unidecode;
soundex_nara(Text::Unidecode::unidecode(@_));
}
eval { XSLoader::load(__PACKAGE__, $VERSION) };
if (defined(&soundex_xs)) {
*soundex = \&soundex_xs;
} else {
*soundex = \&soundex_noxs;
*soundex_xs = sub {
require Carp;
Carp::croak("XS implementation of Text::Soundex::soundex_xs() ".
"could not be loaded");
};
}
1;
__END__
# Implementation of the soundex algorithm.
#
# Some of this documention was written by Mike Stok.
#
# Examples:
#
# Euler, Ellery -> E460
# Gauss, Ghosh -> G200
# Hilbert, Heilbronn -> H416
# Knuth, Kant -> K530
# Lloyd, Ladd -> L300
# Lukasiewicz, Lissajous -> L222
#
=head1 NAME
Text::Soundex - Implementation of the soundex algorithm.
=head1 SYNOPSIS
use Text::Soundex;
# Original algorithm.
$code = soundex($name); # Get the soundex code for a name.
@codes = soundex(@names); # Get the list of codes for a list of names.
# American Soundex variant (NARA) - Used for US census data.
$code = soundex_nara($name); # Get the soundex code for a name.
@codes = soundex_nara(@names); # Get the list of codes for a list of names.
# Redefine the value that soundex() will return if the input string
# contains no identifiable sounds within it.
$Text::Soundex::nocode = 'Z000';
=head1 DESCRIPTION
Soundex is a phonetic algorithm for indexing names by sound, as
pronounced in English. The goal is for names with the same
pronunciation to be encoded to the same representation so that they
can be matched despite minor differences in spelling. Soundex is the
most widely known of all phonetic algorithms and is often used
(incorrectly) as a synonym for "phonetic algorithm". Improvements to
Soundex are the basis for many modern phonetic algorithms. (Wikipedia,
2007)
This module implements the original soundex algorithm developed by
Robert Russell and Margaret Odell, patented in 1918 and 1922, as well
as a variation called "American Soundex" used for US census data, and
current maintained by the National Archives and Records Administration
(NARA).
The soundex algorithm may be recognized from Donald Knuth's
B