diff options
author | Florian Ragwitz <rafl@debian.org> | 2011-07-17 13:57:18 +0200 |
---|---|---|
committer | Florian Ragwitz <rafl@debian.org> | 2011-07-17 14:23:16 +0200 |
commit | f80b753a916872bf199bf581c08f65d7edd9edfe (patch) | |
tree | e0efe58f5beeb1a63dcb44be39646a2123b03375 /lib/Search | |
parent | e1f74b1ab09f3774466834ae04ca0dc7ea854e43 (diff) | |
download | perl-f80b753a916872bf199bf581c08f65d7edd9edfe.tar.gz |
Dual-life Search::Dict
Diffstat (limited to 'lib/Search')
-rw-r--r-- | lib/Search/Dict.pm | 108 | ||||
-rw-r--r-- | lib/Search/Dict.t | 87 |
2 files changed, 0 insertions, 195 deletions
diff --git a/lib/Search/Dict.pm b/lib/Search/Dict.pm deleted file mode 100644 index cbbaf99a50..0000000000 --- a/lib/Search/Dict.pm +++ /dev/null @@ -1,108 +0,0 @@ -package Search::Dict; -require 5.000; -require Exporter; - -use strict; - -our $VERSION = '1.03'; -our @ISA = qw(Exporter); -our @EXPORT = qw(look); - -=head1 NAME - -Search::Dict, look - search for key in dictionary file - -=head1 SYNOPSIS - - use Search::Dict; - look *FILEHANDLE, $key, $dict, $fold; - - use Search::Dict; - look *FILEHANDLE, $params; - -=head1 DESCRIPTION - -Sets file position in FILEHANDLE to be first line greater than or equal -(stringwise) to I<$key>. Returns the new file position, or -1 if an error -occurs. - -The flags specify dictionary order and case folding: - -If I<$dict> is true, search by dictionary order (ignore anything but word -characters and whitespace). The default is honour all characters. - -If I<$fold> is true, ignore case. The default is to honour case. - -If there are only three arguments and the third argument is a hash -reference, the keys of that hash can have values C<dict>, C<fold>, and -C<comp> or C<xfrm> (see below), and their corresponding values will be -used as the parameters. - -If a comparison subroutine (comp) is defined, it must return less than zero, -zero, or greater than zero, if the first comparand is less than, -equal, or greater than the second comparand. - -If a transformation subroutine (xfrm) is defined, its value is used to -transform the lines read from the filehandle before their comparison. - -=cut - -sub look { - my($fh,$key,$dict,$fold) = @_; - my ($comp, $xfrm); - if (@_ == 3 && ref $dict eq 'HASH') { - my $params = $dict; - $dict = 0; - $dict = $params->{dict} if exists $params->{dict}; - $fold = $params->{fold} if exists $params->{fold}; - $comp = $params->{comp} if exists $params->{comp}; - $xfrm = $params->{xfrm} if exists $params->{xfrm}; - } - $comp = sub { $_[0] cmp $_[1] } unless defined $comp; - local($_); - my(@stat) = stat($fh) - or return -1; - my($size, $blksize) = @stat[7,11]; - $blksize ||= 8192; - $key =~ s/[^\w\s]//g if $dict; - $key = lc $key if $fold; - # find the right block - my($min, $max) = (0, int($size / $blksize)); - my $mid; - while ($max - $min > 1) { - $mid = int(($max + $min) / 2); - seek($fh, $mid * $blksize, 0) - or return -1; - <$fh> if $mid; # probably a partial line - $_ = <$fh>; - $_ = $xfrm->($_) if defined $xfrm; - chomp; - s/[^\w\s]//g if $dict; - $_ = lc $_ if $fold; - if (defined($_) && $comp->($_, $key) < 0) { - $min = $mid; - } - else { - $max = $mid; - } - } - # find the right line - $min *= $blksize; - seek($fh,$min,0) - or return -1; - <$fh> if $min; - for (;;) { - $min = tell($fh); - defined($_ = <$fh>) - or last; - $_ = $xfrm->($_) if defined $xfrm; - chomp; - s/[^\w\s]//g if $dict; - $_ = lc $_ if $fold; - last if $comp->($_, $key) >= 0; - } - seek($fh,$min,0); - $min; -} - -1; diff --git a/lib/Search/Dict.t b/lib/Search/Dict.t deleted file mode 100644 index c36fdb8c34..0000000000 --- a/lib/Search/Dict.t +++ /dev/null @@ -1,87 +0,0 @@ -#!./perl - -BEGIN { - chdir 't' if -d 't'; - @INC = '../lib'; -} - -print "1..4\n"; - -$DICT = <<EOT; -Aarhus -Aaron -Ababa -aback -abaft -abandon -abandoned -abandoning -abandonment -abandons -abase -abased -abasement -abasements -abases -abash -abashed -abashes -abashing -abasing -abate -abated -abatement -abatements -abater -abates -abating -Abba -EOT - -use Search::Dict; - -open(DICT, "+>dict-$$") or die "Can't create dict-$$: $!"; -binmode DICT; # To make length expected one. -print DICT $DICT; - -my $pos = look *DICT, "Ababa"; -chomp($word = <DICT>); -print "not " if $pos < 0 || $word ne "Ababa"; -print "ok 1\n"; - -if (ord('a') > ord('A') ) { # ASCII - - $pos = look *DICT, "foo"; - chomp($word = <DICT>); - - print "not " if $pos != length($DICT); # will search to end of file - print "ok 2\n"; - - my $pos = look *DICT, "abash"; - chomp($word = <DICT>); - print "not " if $pos < 0 || $word ne "abash"; - print "ok 3\n"; - -} -else { # EBCDIC systems e.g. os390 - - $pos = look *DICT, "FOO"; - chomp($word = <DICT>); - - print "not " if $pos != length($DICT); # will search to end of file - print "ok 2\n"; - - my $pos = look *DICT, "Abba"; - chomp($word = <DICT>); - print "not " if $pos < 0 || $word ne "Abba"; - print "ok 3\n"; -} - -$pos = look *DICT, "aarhus", 1, 1; -chomp($word = <DICT>); - -print "not " if $pos < 0 || $word ne "Aarhus"; -print "ok 4\n"; - -close DICT or die "cannot close"; -unlink "dict-$$"; |