1 |
|
---|
2 | package Memoize::Expire;
|
---|
3 | # require 5.00556;
|
---|
4 | use Carp;
|
---|
5 | $DEBUG = 0;
|
---|
6 | $VERSION = '1.00';
|
---|
7 |
|
---|
8 | # This package will implement expiration by prepending a fixed-length header
|
---|
9 | # to the font of the cached data. The format of the header will be:
|
---|
10 | # (4-byte number of last-access-time) (For LRU when I implement it)
|
---|
11 | # (4-byte expiration time: unsigned seconds-since-unix-epoch)
|
---|
12 | # (2-byte number-of-uses-before-expire)
|
---|
13 |
|
---|
14 | sub _header_fmt () { "N N n" }
|
---|
15 | sub _header_size () { length(_header_fmt) }
|
---|
16 |
|
---|
17 | # Usage: memoize func
|
---|
18 | # TIE => [Memoize::Expire, LIFETIME => sec, NUM_USES => n,
|
---|
19 | # TIE => [...] ]
|
---|
20 |
|
---|
21 | BEGIN {
|
---|
22 | eval {require Time::HiRes};
|
---|
23 | unless ($@) {
|
---|
24 | Time::HiRes->import('time');
|
---|
25 | }
|
---|
26 | }
|
---|
27 |
|
---|
28 | sub TIEHASH {
|
---|
29 | my ($package, %args) = @_;
|
---|
30 | my %cache;
|
---|
31 | if ($args{TIE}) {
|
---|
32 | my ($module, @opts) = @{$args{TIE}};
|
---|
33 | my $modulefile = $module . '.pm';
|
---|
34 | $modulefile =~ s{::}{/}g;
|
---|
35 | eval { require $modulefile };
|
---|
36 | if ($@) {
|
---|
37 | croak "Memoize::Expire: Couldn't load hash tie module `$module': $@; aborting";
|
---|
38 | }
|
---|
39 | my $rc = (tie %cache => $module, @opts);
|
---|
40 | unless ($rc) {
|
---|
41 | croak "Memoize::Expire: Couldn't tie hash to `$module': $@; aborting";
|
---|
42 | }
|
---|
43 | }
|
---|
44 | $args{LIFETIME} ||= 0;
|
---|
45 | $args{NUM_USES} ||= 0;
|
---|
46 | $args{C} = \%cache;
|
---|
47 | bless \%args => $package;
|
---|
48 | }
|
---|
49 |
|
---|
50 | sub STORE {
|
---|
51 | $DEBUG and print STDERR " >> Store $_[1] $_[2]\n";
|
---|
52 | my ($self, $key, $value) = @_;
|
---|
53 | my $expire_time = $self->{LIFETIME} > 0 ? $self->{LIFETIME} + time : 0;
|
---|
54 | # The call that results in a value to store into the cache is the
|
---|
55 | # first of the NUM_USES allowed calls.
|
---|
56 | my $header = _make_header(time, $expire_time, $self->{NUM_USES}-1);
|
---|
57 | $self->{C}{$key} = $header . $value;
|
---|
58 | $value;
|
---|
59 | }
|
---|
60 |
|
---|
61 | sub FETCH {
|
---|
62 | $DEBUG and print STDERR " >> Fetch cached value for $_[1]\n";
|
---|
63 | my ($data, $last_access, $expire_time, $num_uses_left) = _get_item($_[0]{C}{$_[1]});
|
---|
64 | $DEBUG and print STDERR " >> (ttl: ", ($expire_time-time()), ", nuses: $num_uses_left)\n";
|
---|
65 | $num_uses_left--;
|
---|
66 | $last_access = time;
|
---|
67 | _set_header(@_, $data, $last_access, $expire_time, $num_uses_left);
|
---|
68 | $data;
|
---|
69 | }
|
---|
70 |
|
---|
71 | sub EXISTS {
|
---|
72 | $DEBUG and print STDERR " >> Exists $_[1]\n";
|
---|
73 | unless (exists $_[0]{C}{$_[1]}) {
|
---|
74 | $DEBUG and print STDERR " Not in underlying hash at all.\n";
|
---|
75 | return 0;
|
---|
76 | }
|
---|
77 | my $item = $_[0]{C}{$_[1]};
|
---|
78 | my ($last_access, $expire_time, $num_uses_left) = _get_header($item);
|
---|
79 | my $ttl = $expire_time - time;
|
---|
80 | if ($DEBUG) {
|
---|
81 | $_[0]{LIFETIME} and print STDERR " Time to live for this item: $ttl\n";
|
---|
82 | $_[0]{NUM_USES} and print STDERR " Uses remaining: $num_uses_left\n";
|
---|
83 | }
|
---|
84 | if ( (! $_[0]{LIFETIME} || $expire_time > time)
|
---|
85 | && (! $_[0]{NUM_USES} || $num_uses_left > 0 )) {
|
---|
86 | $DEBUG and print STDERR " (Still good)\n";
|
---|
87 | return 1;
|
---|
88 | } else {
|
---|
89 | $DEBUG and print STDERR " (Expired)\n";
|
---|
90 | return 0;
|
---|
91 | }
|
---|
92 | }
|
---|
93 |
|
---|
94 | # Arguments: last access time, expire time, number of uses remaining
|
---|
95 | sub _make_header {
|
---|
96 | pack "N N n", @_;
|
---|
97 | }
|
---|
98 |
|
---|
99 | sub _strip_header {
|
---|
100 | substr($_[0], 10);
|
---|
101 | }
|
---|
102 |
|
---|
103 | # Arguments: last access time, expire time, number of uses remaining
|
---|
104 | sub _set_header {
|
---|
105 | my ($self, $key, $data, @header) = @_;
|
---|
106 | $self->{C}{$key} = _make_header(@header) . $data;
|
---|
107 | }
|
---|
108 |
|
---|
109 | sub _get_item {
|
---|
110 | my $data = substr($_[0], 10);
|
---|
111 | my @header = unpack "N N n", substr($_[0], 0, 10);
|
---|
112 | # print STDERR " >> _get_item: $data => $data @header\n";
|
---|
113 | ($data, @header);
|
---|
114 | }
|
---|
115 |
|
---|
116 | # Return last access time, expire time, number of uses remaining
|
---|
117 | sub _get_header {
|
---|
118 | unpack "N N n", substr($_[0], 0, 10);
|
---|
119 | }
|
---|
120 |
|
---|
121 | 1;
|
---|
122 |
|
---|
123 | =head1 NAME
|
---|
124 |
|
---|
125 | Memoize::Expire - Plug-in module for automatic expiration of memoized values
|
---|
126 |
|
---|
127 | =head1 SYNOPSIS
|
---|
128 |
|
---|
129 | use Memoize;
|
---|
130 | use Memoize::Expire;
|
---|
131 | tie my %cache => 'Memoize::Expire',
|
---|
132 | LIFETIME => $lifetime, # In seconds
|
---|
133 | NUM_USES => $n_uses;
|
---|
134 |
|
---|
135 | memoize 'function', SCALAR_CACHE => [HASH => \%cache ];
|
---|
136 |
|
---|
137 | =head1 DESCRIPTION
|
---|
138 |
|
---|
139 | Memoize::Expire is a plug-in module for Memoize. It allows the cached
|
---|
140 | values for memoized functions to expire automatically. This manual
|
---|
141 | assumes you are already familiar with the Memoize module. If not, you
|
---|
142 | should study that manual carefully first, paying particular attention
|
---|
143 | to the HASH feature.
|
---|
144 |
|
---|
145 | Memoize::Expire is a layer of software that you can insert in between
|
---|
146 | Memoize itself and whatever underlying package implements the cache.
|
---|
147 | The layer presents a hash variable whose values expire whenever they
|
---|
148 | get too old, have been used too often, or both. You tell C<Memoize> to
|
---|
149 | use this forgetful hash as its cache instead of the default, which is
|
---|
150 | an ordinary hash.
|
---|
151 |
|
---|
152 | To specify a real-time timeout, supply the C<LIFETIME> option with a
|
---|
153 | numeric value. Cached data will expire after this many seconds, and
|
---|
154 | will be looked up afresh when it expires. When a data item is looked
|
---|
155 | up afresh, its lifetime is reset.
|
---|
156 |
|
---|
157 | If you specify C<NUM_USES> with an argument of I<n>, then each cached
|
---|
158 | data item will be discarded and looked up afresh after the I<n>th time
|
---|
159 | you access it. When a data item is looked up afresh, its number of
|
---|
160 | uses is reset.
|
---|
161 |
|
---|
162 | If you specify both arguments, data will be discarded from the cache
|
---|
163 | when either expiration condition holds.
|
---|
164 |
|
---|
165 | Memoize::Expire uses a real hash internally to store the cached data.
|
---|
166 | You can use the C<HASH> option to Memoize::Expire to supply a tied
|
---|
167 | hash in place of the ordinary hash that Memoize::Expire will normally
|
---|
168 | use. You can use this feature to add Memoize::Expire as a layer in
|
---|
169 | between a persistent disk hash and Memoize. If you do this, you get a
|
---|
170 | persistent disk cache whose entries expire automatically. For
|
---|
171 | example:
|
---|
172 |
|
---|
173 | # Memoize
|
---|
174 | # |
|
---|
175 | # Memoize::Expire enforces data expiration policy
|
---|
176 | # |
|
---|
177 | # DB_File implements persistence of data in a disk file
|
---|
178 | # |
|
---|
179 | # Disk file
|
---|
180 |
|
---|
181 | use Memoize;
|
---|
182 | use Memoize::Expire;
|
---|
183 | use DB_File;
|
---|
184 |
|
---|
185 | # Set up persistence
|
---|
186 | tie my %disk_cache => 'DB_File', $filename, O_CREAT|O_RDWR, 0666];
|
---|
187 |
|
---|
188 | # Set up expiration policy, supplying persistent hash as a target
|
---|
189 | tie my %cache => 'Memoize::Expire',
|
---|
190 | LIFETIME => $lifetime, # In seconds
|
---|
191 | NUM_USES => $n_uses,
|
---|
192 | HASH => \%disk_cache;
|
---|
193 |
|
---|
194 | # Set up memoization, supplying expiring persistent hash for cache
|
---|
195 | memoize 'function', SCALAR_CACHE => [ HASH => \%cache ];
|
---|
196 |
|
---|
197 | =head1 INTERFACE
|
---|
198 |
|
---|
199 | There is nothing special about Memoize::Expire. It is just an
|
---|
200 | example. If you don't like the policy that it implements, you are
|
---|
201 | free to write your own expiration policy module that implements
|
---|
202 | whatever policy you desire. Here is how to do that. Let us suppose
|
---|
203 | that your module will be named MyExpirePolicy.
|
---|
204 |
|
---|
205 | Short summary: You need to create a package that defines four methods:
|
---|
206 |
|
---|
207 | =over 4
|
---|
208 |
|
---|
209 | =item
|
---|
210 | TIEHASH
|
---|
211 |
|
---|
212 | Construct and return cache object.
|
---|
213 |
|
---|
214 | =item
|
---|
215 | EXISTS
|
---|
216 |
|
---|
217 | Given a function argument, is the corresponding function value in the
|
---|
218 | cache, and if so, is it fresh enough to use?
|
---|
219 |
|
---|
220 | =item
|
---|
221 | FETCH
|
---|
222 |
|
---|
223 | Given a function argument, look up the corresponding function value in
|
---|
224 | the cache and return it.
|
---|
225 |
|
---|
226 | =item
|
---|
227 | STORE
|
---|
228 |
|
---|
229 | Given a function argument and the corresponding function value, store
|
---|
230 | them into the cache.
|
---|
231 |
|
---|
232 | =item
|
---|
233 | CLEAR
|
---|
234 |
|
---|
235 | (Optional.) Flush the cache completely.
|
---|
236 |
|
---|
237 | =back
|
---|
238 |
|
---|
239 | The user who wants the memoization cache to be expired according to
|
---|
240 | your policy will say so by writing
|
---|
241 |
|
---|
242 | tie my %cache => 'MyExpirePolicy', args...;
|
---|
243 | memoize 'function', SCALAR_CACHE => [HASH => \%cache];
|
---|
244 |
|
---|
245 | This will invoke C<< MyExpirePolicy->TIEHASH(args) >>.
|
---|
246 | MyExpirePolicy::TIEHASH should do whatever is appropriate to set up
|
---|
247 | the cache, and it should return the cache object to the caller.
|
---|
248 |
|
---|
249 | For example, MyExpirePolicy::TIEHASH might create an object that
|
---|
250 | contains a regular Perl hash (which it will to store the cached
|
---|
251 | values) and some extra information about the arguments and how old the
|
---|
252 | data is and things like that. Let us call this object `C'.
|
---|
253 |
|
---|
254 | When Memoize needs to check to see if an entry is in the cache
|
---|
255 | already, it will invoke C<< C->EXISTS(key) >>. C<key> is the normalized
|
---|
256 | function argument. MyExpirePolicy::EXISTS should return 0 if the key
|
---|
257 | is not in the cache, or if it has expired, and 1 if an unexpired value
|
---|
258 | is in the cache. It should I<not> return C<undef>, because there is a
|
---|
259 | bug in some versions of Perl that will cause a spurious FETCH if the
|
---|
260 | EXISTS method returns C<undef>.
|
---|
261 |
|
---|
262 | If your EXISTS function returns true, Memoize will try to fetch the
|
---|
263 | cached value by invoking C<< C->FETCH(key) >>. MyExpirePolicy::FETCH should
|
---|
264 | return the cached value. Otherwise, Memoize will call the memoized
|
---|
265 | function to compute the appropriate value, and will store it into the
|
---|
266 | cache by calling C<< C->STORE(key, value) >>.
|
---|
267 |
|
---|
268 | Here is a very brief example of a policy module that expires each
|
---|
269 | cache item after ten seconds.
|
---|
270 |
|
---|
271 | package Memoize::TenSecondExpire;
|
---|
272 |
|
---|
273 | sub TIEHASH {
|
---|
274 | my ($package, %args) = @_;
|
---|
275 | my $cache = $args{HASH} || {};
|
---|
276 | bless $cache => $package;
|
---|
277 | }
|
---|
278 |
|
---|
279 | sub EXISTS {
|
---|
280 | my ($cache, $key) = @_;
|
---|
281 | if (exists $cache->{$key} &&
|
---|
282 | $cache->{$key}{EXPIRE_TIME} > time) {
|
---|
283 | return 1
|
---|
284 | } else {
|
---|
285 | return 0; # Do NOT return `undef' here.
|
---|
286 | }
|
---|
287 | }
|
---|
288 |
|
---|
289 | sub FETCH {
|
---|
290 | my ($cache, $key) = @_;
|
---|
291 | return $cache->{$key}{VALUE};
|
---|
292 | }
|
---|
293 |
|
---|
294 | sub STORE {
|
---|
295 | my ($cache, $key, $newvalue) = @_;
|
---|
296 | $cache->{$key}{VALUE} = $newvalue;
|
---|
297 | $cache->{$key}{EXPIRE_TIME} = time + 10;
|
---|
298 | }
|
---|
299 |
|
---|
300 | To use this expiration policy, the user would say
|
---|
301 |
|
---|
302 | use Memoize;
|
---|
303 | tie my %cache10sec => 'Memoize::TenSecondExpire';
|
---|
304 | memoize 'function', SCALAR_CACHE => [HASH => \%cache10sec];
|
---|
305 |
|
---|
306 | Memoize would then call C<function> whenever a cached value was
|
---|
307 | entirely absent or was older than ten seconds.
|
---|
308 |
|
---|
309 | You should always support a C<HASH> argument to C<TIEHASH> that ties
|
---|
310 | the underlying cache so that the user can specify that the cache is
|
---|
311 | also persistent or that it has some other interesting semantics. The
|
---|
312 | example above demonstrates how to do this, as does C<Memoize::Expire>.
|
---|
313 |
|
---|
314 | =head1 ALTERNATIVES
|
---|
315 |
|
---|
316 | Brent Powers has a C<Memoize::ExpireLRU> module that was designed to
|
---|
317 | work with Memoize and provides expiration of least-recently-used data.
|
---|
318 | The cache is held at a fixed number of entries, and when new data
|
---|
319 | comes in, the least-recently used data is expired. See
|
---|
320 | L<http://search.cpan.org/search?mode=module&query=ExpireLRU>.
|
---|
321 |
|
---|
322 | Joshua Chamas's Tie::Cache module may be useful as an expiration
|
---|
323 | manager. (If you try this, let me know how it works out.)
|
---|
324 |
|
---|
325 | If you develop any useful expiration managers that you think should be
|
---|
326 | distributed with Memoize, please let me know.
|
---|
327 |
|
---|
328 | =head1 CAVEATS
|
---|
329 |
|
---|
330 | This module is experimental, and may contain bugs. Please report bugs
|
---|
331 | to the address below.
|
---|
332 |
|
---|
333 | Number-of-uses is stored as a 16-bit unsigned integer, so can't exceed
|
---|
334 | 65535.
|
---|
335 |
|
---|
336 | Because of clock granularity, expiration times may occur up to one
|
---|
337 | second sooner than you expect. For example, suppose you store a value
|
---|
338 | with a lifetime of ten seconds, and you store it at 12:00:00.998 on a
|
---|
339 | certain day. Memoize will look at the clock and see 12:00:00. Then
|
---|
340 | 9.01 seconds later, at 12:00:10.008 you try to read it back. Memoize
|
---|
341 | will look at the clock and see 12:00:10 and conclude that the value
|
---|
342 | has expired. This will probably not occur if you have
|
---|
343 | C<Time::HiRes> installed.
|
---|
344 |
|
---|
345 | =head1 AUTHOR
|
---|
346 |
|
---|
347 | Mark-Jason Dominus ([email protected])
|
---|
348 |
|
---|
349 | Mike Cariaso provided valuable insight into the best way to solve this
|
---|
350 | problem.
|
---|
351 |
|
---|
352 | =head1 SEE ALSO
|
---|
353 |
|
---|
354 | perl(1)
|
---|
355 |
|
---|
356 | The Memoize man page.
|
---|
357 |
|
---|
358 | http://www.plover.com/~mjd/perl/Memoize/ (for news and updates)
|
---|
359 |
|
---|
360 | I maintain a mailing list on which I occasionally announce new
|
---|
361 | versions of Memoize. The list is for announcements only, not
|
---|
362 | discussion. To join, send an empty message to
|
---|
363 | [email protected].
|
---|
364 |
|
---|
365 | =cut
|
---|