source: gsdl/trunk/perllib/cpan/File/RandomAccess.pm@ 16844

Last change on this file since 16844 was 16844, checked in by davidb, 16 years ago

File::RandomAccess added to support Image::ExifTool. Both are pure Perl modules

File size: 11.3 KB
Line 
1#------------------------------------------------------------------------------
2# File: RandomAccess.pm
3#
4# Description: Buffer to support random access reading of sequential file
5#
6# Revisions: 02/11/04 - P. Harvey Created
7# 02/20/04 - P. Harvey Added flag to disable SeekTest in new()
8# 11/18/04 - P. Harvey Fixed bug with seek relative to end of file
9# 01/02/05 - P. Harvey Added DEBUG code
10# 01/09/06 - P. Harvey Fixed bug in ReadLine() when using
11# multi-character EOL sequences
12# 02/20/06 - P. Harvey Fixed bug where seek past end of file could
13# generate "substr outside string" warning
14# 06/10/06 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
15# 11/23/06 - P. Harvey Limit reads to < 0x80000000 bytes
16#
17# Notes: Calls the normal file i/o routines unless SeekTest() fails, in
18# which case the file is buffered in memory to allow random access.
19# SeekTest() is called automatically when the object is created
20# unless specified.
21#
22# May also be used for string i/o (just pass a scalar reference)
23#
24# Legal: Copyright (c) 2004-2006 Phil Harvey (phil at owl.phy.queensu.ca)
25# This library is free software; you can redistribute it and/or
26# modify it under the same terms as Perl itself.
27#------------------------------------------------------------------------------
28
29package File::RandomAccess;
30
31use strict;
32require 5.002;
33require Exporter;
34
35use vars qw($VERSION @ISA @EXPORT_OK);
36$VERSION = '1.07';
37@ISA = qw(Exporter);
38
39# constants
40my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
41my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
42
43#------------------------------------------------------------------------------
44# Create new RandomAccess object
45# Inputs: 0) reference to RandomAccess object or RandomAccess class name
46# 1) file reference or scalar reference
47# 2) flag set if file is already random access (disables automatic SeekTest)
48sub new($$;$)
49{
50 my ($that, $filePt, $isRandom) = @_;
51 my $class = ref($that) || $that;
52 my $self;
53
54 if (ref $filePt eq 'SCALAR') {
55 # string i/o
56 $self = {
57 BUFF_PT => $filePt,
58 POS => 0,
59 LEN => length($$filePt),
60 TESTED => -1,
61 };
62 bless $self, $class;
63 } else {
64 # file i/o
65 my $buff = '';
66 $self = {
67 FILE_PT => $filePt, # file pointer
68 BUFF_PT => \$buff, # reference to file data
69 POS => 0, # current position in file
70 LEN => 0, # data length
71 TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
72 };
73 bless $self, $class;
74 $self->SeekTest() unless $isRandom;
75 }
76 return $self;
77}
78
79#------------------------------------------------------------------------------
80# Enable DEBUG code
81# Inputs: 0) reference to RandomAccess object
82sub Debug($)
83{
84 my $self = shift;
85 $self->{DEBUG} = { };
86}
87
88#------------------------------------------------------------------------------
89# Perform seek test and turn on buffering if necessary
90# Inputs: 0) reference to RandomAccess object
91# Returns: 1 if seek test passed (ie. no buffering required)
92# Notes: Must be done before any other i/o
93sub SeekTest($)
94{
95 my $self = shift;
96 unless ($self->{TESTED}) {
97 my $fp = $self->{FILE_PT};
98 if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
99 $self->{TESTED} = 1; # test passed
100 } else {
101 $self->{TESTED} = -1; # test failed (requires buffering)
102 }
103 }
104 return $self->{TESTED} == 1 ? 1 : 0;
105}
106
107#------------------------------------------------------------------------------
108# Get current position in file
109# Inputs: 0) reference to RandomAccess object
110# Returns: current position in file
111sub Tell($)
112{
113 my $self = shift;
114 my $rtnVal;
115 if ($self->{TESTED} < 0) {
116 $rtnVal = $self->{POS};
117 } else {
118 $rtnVal = tell($self->{FILE_PT});
119 }
120 return $rtnVal;
121}
122
123#------------------------------------------------------------------------------
124# Seek to position in file
125# Inputs: 0) reference to RandomAccess object
126# 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
127# Returns: 1 on success
128# Notes: When buffered, this doesn't quite behave like seek() since it will return
129# success even if you seek outside the limits of the file. However if you
130# do this, you will get an error on your next Read().
131sub Seek($$;$)
132{
133 my ($self, $num, $whence) = @_;
134 $whence = 0 unless defined $whence;
135 my $rtnVal;
136 if ($self->{TESTED} < 0) {
137 my $newPos;
138 if ($whence == 0) {
139 $newPos = $num; # from start of file
140 } elsif ($whence == 1) {
141 $newPos = $num + $self->{POS}; # relative to current position
142 } else {
143 $self->Slurp(); # read whole file into buffer
144 $newPos = $num + $self->{LEN}; # relative to end of file
145 }
146 if ($newPos >= 0) {
147 $self->{POS} = $newPos;
148 $rtnVal = 1;
149 }
150 } else {
151 $rtnVal = seek($self->{FILE_PT}, $num, $whence);
152 }
153 return $rtnVal;
154}
155
156#------------------------------------------------------------------------------
157# Read from the file
158# Inputs: 0) reference to RandomAccess object
159# 1) buffer, 2) bytes to read
160# Returns: Number of bytes read
161sub Read($$$)
162{
163 my $self = shift;
164 my $len = $_[1];
165 my $rtnVal;
166
167 # avoid dying with "Negative length" error
168 return 0 if $len & 0x80000000;
169
170 if ($self->{TESTED} < 0) {
171 my $buff;
172 my $newPos = $self->{POS} + $len;
173 # number of bytes to read from file
174 my $num = $newPos - $self->{LEN};
175 if ($num > 0 and $self->{FILE_PT}) {
176 # read data from file in multiples of $CHUNK_SIZE
177 $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
178 $num = read($self->{FILE_PT}, $buff, $num);
179 if ($num) {
180 ${$self->{BUFF_PT}} .= $buff;
181 $self->{LEN} += $num;
182 }
183 }
184 # number of bytes left in data buffer
185 $num = $self->{LEN} - $self->{POS};
186 if ($len <= $num) {
187 $rtnVal = $len;
188 } elsif ($num <= 0) {
189 $_[0] = '';
190 return 0;
191 } else {
192 $rtnVal = $num;
193 }
194 # return data from our buffer
195 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
196 $self->{POS} += $rtnVal;
197 } else {
198 $_[0] = '' unless defined $_[0];
199 $rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
200 }
201 if ($self->{DEBUG}) {
202 my $pos = $self->Tell() - $rtnVal;
203 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
204 $self->{DEBUG}->{$pos} = $rtnVal;
205 }
206 }
207 return $rtnVal;
208}
209
210#------------------------------------------------------------------------------
211# Read a line from file (end of line is $/)
212# Inputs: 0) reference to RandomAccess object, 1) buffer
213# Returns: Number of bytes read
214sub ReadLine($$)
215{
216 my $self = shift;
217 my $rtnVal;
218 my $fp = $self->{FILE_PT};
219
220 if ($self->{TESTED} < 0) {
221 my ($num, $buff);
222 my $pos = $self->{POS};
223 if ($fp) {
224 # make sure we have some data after the current position
225 while ($self->{LEN} <= $pos) {
226 $num = read($fp, $buff, $CHUNK_SIZE);
227 return 0 unless $num;
228 ${$self->{BUFF_PT}} .= $buff;
229 $self->{LEN} += $num;
230 }
231 # scan and read until we find the EOL (or hit EOF)
232 for (;;) {
233 $pos = index(${$self->{BUFF_PT}}, $/, $pos) + length($/);
234 last if $pos > 0;
235 $pos = $self->{LEN}; # have scanned to end of buffer
236 $num = read($fp, $buff, $CHUNK_SIZE) or last;
237 ${$self->{BUFF_PT}} .= $buff;
238 $self->{LEN} += $num;
239 }
240 } else {
241 # string i/o
242 $pos = index(${$self->{BUFF_PT}}, $/, $pos) + length($/);
243 $pos <= 0 and $pos = $self->{LEN};
244 }
245 # read the line from our buffer
246 $rtnVal = $pos - $self->{POS};
247 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
248 $self->{POS} = $pos;
249 } else {
250 $_[0] = <$fp>;
251 if (defined $_[0]) {
252 $rtnVal = length($_[0]);
253 } else {
254 $rtnVal = 0;
255 }
256 }
257 if ($self->{DEBUG}) {
258 my $pos = $self->Tell() - $rtnVal;
259 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
260 $self->{DEBUG}->{$pos} = $rtnVal;
261 }
262 }
263 return $rtnVal;
264}
265
266#------------------------------------------------------------------------------
267# Read whole file into buffer (without changing read pointer)
268# Inputs: 0) reference to RandomAccess object
269sub Slurp($)
270{
271 my $self = shift;
272 my $fp = $self->{FILE_PT} || return;
273 # read whole file into buffer (in large chunks)
274 my ($buff, $num);
275 while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
276 ${$self->{BUFF_PT}} .= $buff;
277 $self->{LEN} += $num;
278 }
279}
280
281
282#------------------------------------------------------------------------------
283# set binary mode
284# Inputs: 0) reference to RandomAccess object
285sub BinMode($)
286{
287 my $self = shift;
288 binmode($self->{FILE_PT}) if $self->{FILE_PT};
289}
290
291#------------------------------------------------------------------------------
292# close the file and free the buffer
293# Inputs: 0) reference to RandomAccess object
294sub Close($)
295{
296 my $self = shift;
297
298 if ($self->{DEBUG}) {
299 local $_;
300 if ($self->Seek(0,2)) {
301 $self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
302 my $last;
303 my $tot = 0;
304 my $bad = 0;
305 foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
306 my $pos = $_;
307 my $len = $self->{DEBUG}->{$_};
308 if (defined $last and $last < $pos) {
309 my $bytes = $pos - $last;
310 $tot += $bytes;
311 $self->Seek($last);
312 my $buff;
313 $self->Read($buff, $bytes);
314 my $warn = '';
315 if ($buff =~ /[^\0]/) {
316 $bad += ($pos - $last);
317 $warn = ' - NON-ZERO!';
318 }
319 printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
320 }
321 my $cur = $pos + $len;
322 $last = $cur unless defined $last and $last > $cur;
323 }
324 print "$tot bytes missed";
325 $bad and print ", $bad non-zero!";
326 print "\n";
327 } else {
328 warn "File::RandomAccess DEBUG not working (file already closed?)\n";
329 }
330 delete $self->{DEBUG};
331 }
332 # close the file
333 if ($self->{FILE_PT}) {
334 close($self->{FILE_PT});
335 delete $self->{FILE_PT};
336 }
337 # reset the buffer
338 my $emptyBuff = '';
339 $self->{BUFF_PT} = \$emptyBuff;
340 $self->{LEN} = 0;
341 $self->{POS} = 0;
342}
343
344#------------------------------------------------------------------------------
3451; # end
Note: See TracBrowser for help on using the repository browser.