1 | package Search::Dict;
|
---|
2 | require 5.000;
|
---|
3 | require Exporter;
|
---|
4 |
|
---|
5 | use strict;
|
---|
6 |
|
---|
7 | our $VERSION = '1.02';
|
---|
8 | our @ISA = qw(Exporter);
|
---|
9 | our @EXPORT = qw(look);
|
---|
10 |
|
---|
11 | =head1 NAME
|
---|
12 |
|
---|
13 | Search::Dict, look - search for key in dictionary file
|
---|
14 |
|
---|
15 | =head1 SYNOPSIS
|
---|
16 |
|
---|
17 | use Search::Dict;
|
---|
18 | look *FILEHANDLE, $key, $dict, $fold;
|
---|
19 |
|
---|
20 | use Search::Dict;
|
---|
21 | look *FILEHANDLE, $params;
|
---|
22 |
|
---|
23 | =head1 DESCRIPTION
|
---|
24 |
|
---|
25 | Sets file position in FILEHANDLE to be first line greater than or equal
|
---|
26 | (stringwise) to I<$key>. Returns the new file position, or -1 if an error
|
---|
27 | occurs.
|
---|
28 |
|
---|
29 | The flags specify dictionary order and case folding:
|
---|
30 |
|
---|
31 | If I<$dict> is true, search by dictionary order (ignore anything but word
|
---|
32 | characters and whitespace). The default is honour all characters.
|
---|
33 |
|
---|
34 | If I<$fold> is true, ignore case. The default is to honour case.
|
---|
35 |
|
---|
36 | If there are only three arguments and the third argument is a hash
|
---|
37 | reference, the keys of that hash can have values C<dict>, C<fold>, and
|
---|
38 | C<comp> or C<xfrm> (see below), and their correponding values will be
|
---|
39 | used as the parameters.
|
---|
40 |
|
---|
41 | If a comparison subroutine (comp) is defined, it must return less than zero,
|
---|
42 | zero, or greater than zero, if the first comparand is less than,
|
---|
43 | equal, or greater than the second comparand.
|
---|
44 |
|
---|
45 | If a transformation subroutine (xfrm) is defined, its value is used to
|
---|
46 | transform the lines read from the filehandle before their comparison.
|
---|
47 |
|
---|
48 | =cut
|
---|
49 |
|
---|
50 | sub look {
|
---|
51 | my($fh,$key,$dict,$fold) = @_;
|
---|
52 | my ($comp, $xfrm);
|
---|
53 | if (@_ == 3 && ref $dict eq 'HASH') {
|
---|
54 | my $params = $dict;
|
---|
55 | $dict = 0;
|
---|
56 | $dict = $params->{dict} if exists $params->{dict};
|
---|
57 | $fold = $params->{fold} if exists $params->{fold};
|
---|
58 | $comp = $params->{comp} if exists $params->{comp};
|
---|
59 | $xfrm = $params->{xfrm} if exists $params->{xfrm};
|
---|
60 | }
|
---|
61 | $comp = sub { $_[0] cmp $_[1] } unless defined $comp;
|
---|
62 | local($_);
|
---|
63 | my(@stat) = stat($fh)
|
---|
64 | or return -1;
|
---|
65 | my($size, $blksize) = @stat[7,11];
|
---|
66 | $blksize ||= 8192;
|
---|
67 | $key =~ s/[^\w\s]//g if $dict;
|
---|
68 | $key = lc $key if $fold;
|
---|
69 | # find the right block
|
---|
70 | my($min, $max) = (0, int($size / $blksize));
|
---|
71 | my $mid;
|
---|
72 | while ($max - $min > 1) {
|
---|
73 | $mid = int(($max + $min) / 2);
|
---|
74 | seek($fh, $mid * $blksize, 0)
|
---|
75 | or return -1;
|
---|
76 | <$fh> if $mid; # probably a partial line
|
---|
77 | $_ = <$fh>;
|
---|
78 | $_ = $xfrm->($_) if defined $xfrm;
|
---|
79 | chomp;
|
---|
80 | s/[^\w\s]//g if $dict;
|
---|
81 | $_ = lc $_ if $fold;
|
---|
82 | if (defined($_) && $comp->($_, $key) < 0) {
|
---|
83 | $min = $mid;
|
---|
84 | }
|
---|
85 | else {
|
---|
86 | $max = $mid;
|
---|
87 | }
|
---|
88 | }
|
---|
89 | # find the right line
|
---|
90 | $min *= $blksize;
|
---|
91 | seek($fh,$min,0)
|
---|
92 | or return -1;
|
---|
93 | <$fh> if $min;
|
---|
94 | for (;;) {
|
---|
95 | $min = tell($fh);
|
---|
96 | defined($_ = <$fh>)
|
---|
97 | or last;
|
---|
98 | $_ = $xfrm->($_) if defined $xfrm;
|
---|
99 | chomp;
|
---|
100 | s/[^\w\s]//g if $dict;
|
---|
101 | $_ = lc $_ if $fold;
|
---|
102 | last if $comp->($_, $key) >= 0;
|
---|
103 | }
|
---|
104 | seek($fh,$min,0);
|
---|
105 | $min;
|
---|
106 | }
|
---|
107 |
|
---|
108 | 1;
|
---|