1 | package Tie::SubstrHash;
|
---|
2 |
|
---|
3 | our $VERSION = '1.00';
|
---|
4 |
|
---|
5 | =head1 NAME
|
---|
6 |
|
---|
7 | Tie::SubstrHash - Fixed-table-size, fixed-key-length hashing
|
---|
8 |
|
---|
9 | =head1 SYNOPSIS
|
---|
10 |
|
---|
11 | require Tie::SubstrHash;
|
---|
12 |
|
---|
13 | tie %myhash, 'Tie::SubstrHash', $key_len, $value_len, $table_size;
|
---|
14 |
|
---|
15 | =head1 DESCRIPTION
|
---|
16 |
|
---|
17 | The B<Tie::SubstrHash> package provides a hash-table-like interface to
|
---|
18 | an array of determinate size, with constant key size and record size.
|
---|
19 |
|
---|
20 | Upon tying a new hash to this package, the developer must specify the
|
---|
21 | size of the keys that will be used, the size of the value fields that the
|
---|
22 | keys will index, and the size of the overall table (in terms of key-value
|
---|
23 | pairs, not size in hard memory). I<These values will not change for the
|
---|
24 | duration of the tied hash>. The newly-allocated hash table may now have
|
---|
25 | data stored and retrieved. Efforts to store more than C<$table_size>
|
---|
26 | elements will result in a fatal error, as will efforts to store a value
|
---|
27 | not exactly C<$value_len> characters in length, or reference through a
|
---|
28 | key not exactly C<$key_len> characters in length. While these constraints
|
---|
29 | may seem excessive, the result is a hash table using much less internal
|
---|
30 | memory than an equivalent freely-allocated hash table.
|
---|
31 |
|
---|
32 | =head1 CAVEATS
|
---|
33 |
|
---|
34 | Because the current implementation uses the table and key sizes for the
|
---|
35 | hashing algorithm, there is no means by which to dynamically change the
|
---|
36 | value of any of the initialization parameters.
|
---|
37 |
|
---|
38 | The hash does not support exists().
|
---|
39 |
|
---|
40 | =cut
|
---|
41 |
|
---|
42 | use Carp;
|
---|
43 |
|
---|
44 | sub TIEHASH {
|
---|
45 | my $pack = shift;
|
---|
46 | my ($klen, $vlen, $tsize) = @_;
|
---|
47 | my $rlen = 1 + $klen + $vlen;
|
---|
48 | $tsize = [$tsize,
|
---|
49 | findgteprime($tsize * 1.1)]; # Allow 10% empty.
|
---|
50 | local $self = bless ["\0", $klen, $vlen, $tsize, $rlen, 0, -1];
|
---|
51 | $$self[0] x= $rlen * $tsize->[1];
|
---|
52 | $self;
|
---|
53 | }
|
---|
54 |
|
---|
55 | sub CLEAR {
|
---|
56 | local($self) = @_;
|
---|
57 | $$self[0] = "\0" x ($$self[4] * $$self[3]->[1]);
|
---|
58 | $$self[5] = 0;
|
---|
59 | $$self[6] = -1;
|
---|
60 | }
|
---|
61 |
|
---|
62 | sub FETCH {
|
---|
63 | local($self,$key) = @_;
|
---|
64 | local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
|
---|
65 | &hashkey;
|
---|
66 | for (;;) {
|
---|
67 | $offset = $hash * $rlen;
|
---|
68 | $record = substr($$self[0], $offset, $rlen);
|
---|
69 | if (ord($record) == 0) {
|
---|
70 | return undef;
|
---|
71 | }
|
---|
72 | elsif (ord($record) == 1) {
|
---|
73 | }
|
---|
74 | elsif (substr($record, 1, $klen) eq $key) {
|
---|
75 | return substr($record, 1+$klen, $vlen);
|
---|
76 | }
|
---|
77 | &rehash;
|
---|
78 | }
|
---|
79 | }
|
---|
80 |
|
---|
81 | sub STORE {
|
---|
82 | local($self,$key,$val) = @_;
|
---|
83 | local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
|
---|
84 | croak("Table is full ($tsize->[0] elements)") if $$self[5] > $tsize->[0];
|
---|
85 | croak(qq/Value "$val" is not $vlen characters long/)
|
---|
86 | if length($val) != $vlen;
|
---|
87 | my $writeoffset;
|
---|
88 |
|
---|
89 | &hashkey;
|
---|
90 | for (;;) {
|
---|
91 | $offset = $hash * $rlen;
|
---|
92 | $record = substr($$self[0], $offset, $rlen);
|
---|
93 | if (ord($record) == 0) {
|
---|
94 | $record = "\2". $key . $val;
|
---|
95 | die "panic" unless length($record) == $rlen;
|
---|
96 | $writeoffset = $offset unless defined $writeoffset;
|
---|
97 | substr($$self[0], $writeoffset, $rlen) = $record;
|
---|
98 | ++$$self[5];
|
---|
99 | return;
|
---|
100 | }
|
---|
101 | elsif (ord($record) == 1) {
|
---|
102 | $writeoffset = $offset unless defined $writeoffset;
|
---|
103 | }
|
---|
104 | elsif (substr($record, 1, $klen) eq $key) {
|
---|
105 | $record = "\2". $key . $val;
|
---|
106 | die "panic" unless length($record) == $rlen;
|
---|
107 | substr($$self[0], $offset, $rlen) = $record;
|
---|
108 | return;
|
---|
109 | }
|
---|
110 | &rehash;
|
---|
111 | }
|
---|
112 | }
|
---|
113 |
|
---|
114 | sub DELETE {
|
---|
115 | local($self,$key) = @_;
|
---|
116 | local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
|
---|
117 | &hashkey;
|
---|
118 | for (;;) {
|
---|
119 | $offset = $hash * $rlen;
|
---|
120 | $record = substr($$self[0], $offset, $rlen);
|
---|
121 | if (ord($record) == 0) {
|
---|
122 | return undef;
|
---|
123 | }
|
---|
124 | elsif (ord($record) == 1) {
|
---|
125 | }
|
---|
126 | elsif (substr($record, 1, $klen) eq $key) {
|
---|
127 | substr($$self[0], $offset, 1) = "\1";
|
---|
128 | return substr($record, 1+$klen, $vlen);
|
---|
129 | --$$self[5];
|
---|
130 | }
|
---|
131 | &rehash;
|
---|
132 | }
|
---|
133 | }
|
---|
134 |
|
---|
135 | sub FIRSTKEY {
|
---|
136 | local($self) = @_;
|
---|
137 | $$self[6] = -1;
|
---|
138 | &NEXTKEY;
|
---|
139 | }
|
---|
140 |
|
---|
141 | sub NEXTKEY {
|
---|
142 | local($self) = @_;
|
---|
143 | local($klen, $vlen, $tsize, $rlen, $entries, $iterix) = @$self[1..6];
|
---|
144 | for (++$iterix; $iterix < $tsize->[1]; ++$iterix) {
|
---|
145 | next unless substr($$self[0], $iterix * $rlen, 1) eq "\2";
|
---|
146 | $$self[6] = $iterix;
|
---|
147 | return substr($$self[0], $iterix * $rlen + 1, $klen);
|
---|
148 | }
|
---|
149 | $$self[6] = -1;
|
---|
150 | undef;
|
---|
151 | }
|
---|
152 |
|
---|
153 | sub EXISTS {
|
---|
154 | croak "Tie::SubstrHash does not support exists()";
|
---|
155 | }
|
---|
156 |
|
---|
157 | sub hashkey {
|
---|
158 | croak(qq/Key "$key" is not $klen characters long/)
|
---|
159 | if length($key) != $klen;
|
---|
160 | $hash = 2;
|
---|
161 | for (unpack('C*', $key)) {
|
---|
162 | $hash = $hash * 33 + $_;
|
---|
163 | &_hashwrap if $hash >= 1e13;
|
---|
164 | }
|
---|
165 | &_hashwrap if $hash >= $tsize->[1];
|
---|
166 | $hash = 1 unless $hash;
|
---|
167 | $hashbase = $hash;
|
---|
168 | }
|
---|
169 |
|
---|
170 | sub _hashwrap {
|
---|
171 | $hash -= int($hash / $tsize->[1]) * $tsize->[1];
|
---|
172 | }
|
---|
173 |
|
---|
174 | sub rehash {
|
---|
175 | $hash += $hashbase;
|
---|
176 | $hash -= $tsize->[1] if $hash >= $tsize->[1];
|
---|
177 | }
|
---|
178 |
|
---|
179 | # using POSIX::ceil() would be too heavy, and not all platforms have it.
|
---|
180 | sub ceil {
|
---|
181 | my $num = shift;
|
---|
182 | $num = int($num + 1) unless $num == int $num;
|
---|
183 | return $num;
|
---|
184 | }
|
---|
185 |
|
---|
186 | # See:
|
---|
187 | #
|
---|
188 | # http://www-groups.dcs.st-andrews.ac.uk/~history/HistTopics/Prime_numbers.html
|
---|
189 | #
|
---|
190 |
|
---|
191 | sub findgteprime { # find the smallest prime integer greater than or equal to
|
---|
192 | use integer;
|
---|
193 |
|
---|
194 | my $num = ceil(shift);
|
---|
195 | return 2 if $num <= 2;
|
---|
196 |
|
---|
197 | $num++ unless $num % 2;
|
---|
198 | my $i;
|
---|
199 | my $sqrtnum = int sqrt $num;
|
---|
200 | my $sqrtnumsquared = $sqrtnum * $sqrtnum;
|
---|
201 |
|
---|
202 | NUM:
|
---|
203 | for (;; $num += 2) {
|
---|
204 | if ($sqrtnumsquared < $num) {
|
---|
205 | $sqrtnum++;
|
---|
206 | $sqrtnumsquared = $sqrtnum * $sqrtnum;
|
---|
207 | }
|
---|
208 | for ($i = 3; $i <= $sqrtnum; $i += 2) {
|
---|
209 | next NUM unless $num % $i;
|
---|
210 | }
|
---|
211 | return $num;
|
---|
212 | }
|
---|
213 | }
|
---|
214 |
|
---|
215 | 1;
|
---|