1 | #------------------------------------------------------------------------------
|
---|
2 | # File: RandomAccess.pm
|
---|
3 | #
|
---|
4 | # Description: Buffer to support random access reading of sequential file
|
---|
5 | #
|
---|
6 | # Revisions: 02/11/2004 - P. Harvey Created
|
---|
7 | # 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
|
---|
8 | # 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
|
---|
9 | # 01/02/2005 - P. Harvey Added DEBUG code
|
---|
10 | # 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
|
---|
11 | # multi-character EOL sequences
|
---|
12 | # 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
|
---|
13 | # generate "substr outside string" warning
|
---|
14 | # 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
|
---|
15 | # 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
|
---|
16 | # 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
|
---|
17 | # scalar with a multi-character newline
|
---|
18 | # 01/24/2009 - PH Protect against reading too much at once
|
---|
19 | # 10/04/2018 - PH Added NoBuffer option
|
---|
20 | #
|
---|
21 | # Notes: Calls the normal file i/o routines unless SeekTest() fails, in
|
---|
22 | # which case the file is buffered in memory to allow random access.
|
---|
23 | # SeekTest() is called automatically when the object is created
|
---|
24 | # unless specified.
|
---|
25 | #
|
---|
26 | # May also be used for string i/o (just pass a scalar reference)
|
---|
27 | #
|
---|
28 | # Legal: Copyright (c) 2003-2021 Phil Harvey (philharvey66 at gmail.com)
|
---|
29 | # This library is free software; you can redistribute it and/or
|
---|
30 | # modify it under the same terms as Perl itself.
|
---|
31 | #------------------------------------------------------------------------------
|
---|
32 |
|
---|
33 | package File::RandomAccess;
|
---|
34 |
|
---|
35 | use strict;
|
---|
36 | require 5.002;
|
---|
37 | require Exporter;
|
---|
38 |
|
---|
39 | use vars qw($VERSION @ISA @EXPORT_OK);
|
---|
40 | $VERSION = '1.11';
|
---|
41 | @ISA = qw(Exporter);
|
---|
42 |
|
---|
43 | sub Read($$$);
|
---|
44 |
|
---|
45 | # constants
|
---|
46 | my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
|
---|
47 | my $SKIP_SIZE = 65536; # size to skip when fast-forwarding over sequential data
|
---|
48 | my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
|
---|
49 |
|
---|
50 | #------------------------------------------------------------------------------
|
---|
51 | # Create new RandomAccess object
|
---|
52 | # Inputs: 0) reference to RandomAccess object or RandomAccess class name
|
---|
53 | # 1) file reference or scalar reference
|
---|
54 | # 2) flag set if file is already random access (disables automatic SeekTest)
|
---|
55 | sub new($$;$)
|
---|
56 | {
|
---|
57 | my ($that, $filePt, $isRandom) = @_;
|
---|
58 | my $class = ref($that) || $that;
|
---|
59 | my $self;
|
---|
60 |
|
---|
61 | if (ref $filePt eq 'SCALAR') {
|
---|
62 | # string i/o
|
---|
63 | $self = {
|
---|
64 | BUFF_PT => $filePt,
|
---|
65 | BASE => 0,
|
---|
66 | POS => 0,
|
---|
67 | LEN => length($$filePt),
|
---|
68 | TESTED => -1,
|
---|
69 | };
|
---|
70 | bless $self, $class;
|
---|
71 | } else {
|
---|
72 | # file i/o
|
---|
73 | my $buff = '';
|
---|
74 | $self = {
|
---|
75 | FILE_PT => $filePt, # file pointer
|
---|
76 | BUFF_PT => \$buff, # reference to file data
|
---|
77 | BASE => 0, # location of start of buffer in file
|
---|
78 | POS => 0, # current position in buffer
|
---|
79 | LEN => 0, # length of data in buffer
|
---|
80 | TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
|
---|
81 | };
|
---|
82 | bless $self, $class;
|
---|
83 | $self->SeekTest() unless $isRandom;
|
---|
84 | }
|
---|
85 | return $self;
|
---|
86 | }
|
---|
87 |
|
---|
88 | #------------------------------------------------------------------------------
|
---|
89 | # Enable DEBUG code
|
---|
90 | # Inputs: 0) reference to RandomAccess object
|
---|
91 | sub Debug($)
|
---|
92 | {
|
---|
93 | my $self = shift;
|
---|
94 | $self->{DEBUG} = { };
|
---|
95 | }
|
---|
96 |
|
---|
97 | #------------------------------------------------------------------------------
|
---|
98 | # Perform seek test and turn on buffering if necessary
|
---|
99 | # Inputs: 0) reference to RandomAccess object
|
---|
100 | # Returns: 1 if seek test passed (ie. no buffering required)
|
---|
101 | # Notes: Must be done before any other i/o
|
---|
102 | sub SeekTest($)
|
---|
103 | {
|
---|
104 | my $self = shift;
|
---|
105 | unless ($self->{TESTED}) {
|
---|
106 | my $fp = $self->{FILE_PT};
|
---|
107 | if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
|
---|
108 | $self->{TESTED} = 1; # test passed
|
---|
109 | } else {
|
---|
110 | $self->{TESTED} = -1; # test failed (requires buffering)
|
---|
111 | }
|
---|
112 | }
|
---|
113 | return $self->{TESTED} == 1 ? 1 : 0;
|
---|
114 | }
|
---|
115 |
|
---|
116 | #------------------------------------------------------------------------------
|
---|
117 | # Get current position in file
|
---|
118 | # Inputs: 0) reference to RandomAccess object
|
---|
119 | # Returns: current position in file
|
---|
120 | sub Tell($)
|
---|
121 | {
|
---|
122 | my $self = shift;
|
---|
123 | my $rtnVal;
|
---|
124 | if ($self->{TESTED} < 0) {
|
---|
125 | $rtnVal = $self->{POS} + $self->{BASE};
|
---|
126 | } else {
|
---|
127 | $rtnVal = tell($self->{FILE_PT});
|
---|
128 | }
|
---|
129 | return $rtnVal;
|
---|
130 | }
|
---|
131 |
|
---|
132 | #------------------------------------------------------------------------------
|
---|
133 | # Seek to position in file
|
---|
134 | # Inputs: 0) reference to RandomAccess object
|
---|
135 | # 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
|
---|
136 | # Returns: 1 on success
|
---|
137 | # Notes: When buffered, this doesn't quite behave like seek() since it will return
|
---|
138 | # success even if you seek outside the limits of the file. However if you
|
---|
139 | # do this, you will get an error on your next Read().
|
---|
140 | sub Seek($$;$)
|
---|
141 | {
|
---|
142 | my ($self, $num, $whence) = @_;
|
---|
143 | $whence = 0 unless defined $whence;
|
---|
144 | my $rtnVal;
|
---|
145 | if ($self->{TESTED} < 0) {
|
---|
146 | my $newPos;
|
---|
147 | if ($whence == 0) {
|
---|
148 | $newPos = $num - $self->{BASE}; # from start of file
|
---|
149 | } elsif ($whence == 1) {
|
---|
150 | $newPos = $num + $self->{POS}; # relative to current position
|
---|
151 | } elsif ($self->{NoBuffer} and $self->{FILE_PT}) {
|
---|
152 | $newPos = -1; # (can't seek relative to end if no buffering)
|
---|
153 | } else {
|
---|
154 | $self->Slurp(); # read whole file into buffer
|
---|
155 | $newPos = $num + $self->{LEN}; # relative to end of file
|
---|
156 | }
|
---|
157 | if ($newPos >= 0) {
|
---|
158 | $self->{POS} = $newPos;
|
---|
159 | $rtnVal = 1;
|
---|
160 | }
|
---|
161 | } else {
|
---|
162 | $rtnVal = seek($self->{FILE_PT}, $num, $whence);
|
---|
163 | }
|
---|
164 | return $rtnVal;
|
---|
165 | }
|
---|
166 |
|
---|
167 | #------------------------------------------------------------------------------
|
---|
168 | # Read from the file
|
---|
169 | # Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
|
---|
170 | # Returns: Number of bytes read
|
---|
171 | sub Read($$$)
|
---|
172 | {
|
---|
173 | my $self = shift;
|
---|
174 | my $len = $_[1];
|
---|
175 | my $rtnVal;
|
---|
176 |
|
---|
177 | # protect against reading too much at once
|
---|
178 | # (also from dying with a "Negative length" error)
|
---|
179 | if ($len & 0xf8000000) {
|
---|
180 | return 0 if $len < 0;
|
---|
181 | # read in smaller blocks because Windows attempts to pre-allocate
|
---|
182 | # memory for the full size, which can lead to an out-of-memory error
|
---|
183 | my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
|
---|
184 | my $num = Read($self, $_[0], $maxLen);
|
---|
185 | return $num if $num < $maxLen;
|
---|
186 | for (;;) {
|
---|
187 | $len -= $maxLen;
|
---|
188 | last if $len <= 0;
|
---|
189 | my $l = $len < $maxLen ? $len : $maxLen;
|
---|
190 | my $buff;
|
---|
191 | my $n = Read($self, $buff, $l);
|
---|
192 | last unless $n;
|
---|
193 | $_[0] .= $buff;
|
---|
194 | $num += $n;
|
---|
195 | last if $n < $l;
|
---|
196 | }
|
---|
197 | return $num;
|
---|
198 | }
|
---|
199 | # read through our buffer if necessary
|
---|
200 | if ($self->{TESTED} < 0) {
|
---|
201 | # purge old data before reading in NoBuffer mode
|
---|
202 | $self->Purge() or return 0 if $self->{NoBuffer};
|
---|
203 | my $buff;
|
---|
204 | my $newPos = $self->{POS} + $len;
|
---|
205 | # number of bytes to read from file
|
---|
206 | my $num = $newPos - $self->{LEN};
|
---|
207 | if ($num > 0 and $self->{FILE_PT}) {
|
---|
208 | # read data from file in multiples of $CHUNK_SIZE
|
---|
209 | $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
|
---|
210 | $num = read($self->{FILE_PT}, $buff, $num);
|
---|
211 | if ($num) {
|
---|
212 | ${$self->{BUFF_PT}} .= $buff;
|
---|
213 | $self->{LEN} += $num;
|
---|
214 | }
|
---|
215 | }
|
---|
216 | # number of bytes left in data buffer
|
---|
217 | $num = $self->{LEN} - $self->{POS};
|
---|
218 | if ($len <= $num) {
|
---|
219 | $rtnVal = $len;
|
---|
220 | } elsif ($num <= 0) {
|
---|
221 | $_[0] = '';
|
---|
222 | return 0;
|
---|
223 | } else {
|
---|
224 | $rtnVal = $num;
|
---|
225 | }
|
---|
226 | # return data from our buffer
|
---|
227 | $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
|
---|
228 | $self->{POS} += $rtnVal;
|
---|
229 | } else {
|
---|
230 | # read directly from file
|
---|
231 | $_[0] = '' unless defined $_[0];
|
---|
232 | $rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
|
---|
233 | }
|
---|
234 | if ($self->{DEBUG}) {
|
---|
235 | my $pos = $self->Tell() - $rtnVal;
|
---|
236 | unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
|
---|
237 | $self->{DEBUG}->{$pos} = $rtnVal;
|
---|
238 | }
|
---|
239 | }
|
---|
240 | return $rtnVal;
|
---|
241 | }
|
---|
242 |
|
---|
243 | #------------------------------------------------------------------------------
|
---|
244 | # Read a line from file (end of line is $/)
|
---|
245 | # Inputs: 0) reference to RandomAccess object, 1) buffer
|
---|
246 | # Returns: Number of bytes read
|
---|
247 | sub ReadLine($$)
|
---|
248 | {
|
---|
249 | my $self = shift;
|
---|
250 | my $rtnVal;
|
---|
251 | my $fp = $self->{FILE_PT};
|
---|
252 |
|
---|
253 | if ($self->{TESTED} < 0) {
|
---|
254 | my ($num, $buff);
|
---|
255 | $self->Purge() or return 0 if $self->{NoBuffer};
|
---|
256 | my $pos = $self->{POS};
|
---|
257 | if ($fp) {
|
---|
258 | # make sure we have some data after the current position
|
---|
259 | while ($self->{LEN} <= $pos) {
|
---|
260 | $num = read($fp, $buff, $CHUNK_SIZE);
|
---|
261 | return 0 unless $num;
|
---|
262 | ${$self->{BUFF_PT}} .= $buff;
|
---|
263 | $self->{LEN} += $num;
|
---|
264 | }
|
---|
265 | # scan and read until we find the EOL (or hit EOF)
|
---|
266 | for (;;) {
|
---|
267 | $pos = index(${$self->{BUFF_PT}}, $/, $pos);
|
---|
268 | if ($pos >= 0) {
|
---|
269 | $pos += length($/);
|
---|
270 | last;
|
---|
271 | }
|
---|
272 | $pos = $self->{LEN}; # have scanned to end of buffer
|
---|
273 | $num = read($fp, $buff, $CHUNK_SIZE) or last;
|
---|
274 | ${$self->{BUFF_PT}} .= $buff;
|
---|
275 | $self->{LEN} += $num;
|
---|
276 | }
|
---|
277 | } else {
|
---|
278 | # string i/o
|
---|
279 | $pos = index(${$self->{BUFF_PT}}, $/, $pos);
|
---|
280 | if ($pos < 0) {
|
---|
281 | $pos = $self->{LEN};
|
---|
282 | $self->{POS} = $pos if $self->{POS} > $pos;
|
---|
283 | } else {
|
---|
284 | $pos += length($/);
|
---|
285 | }
|
---|
286 | }
|
---|
287 | # read the line from our buffer
|
---|
288 | $rtnVal = $pos - $self->{POS};
|
---|
289 | $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
|
---|
290 | $self->{POS} = $pos;
|
---|
291 | } else {
|
---|
292 | $_[0] = <$fp>;
|
---|
293 | if (defined $_[0]) {
|
---|
294 | $rtnVal = length($_[0]);
|
---|
295 | } else {
|
---|
296 | $rtnVal = 0;
|
---|
297 | }
|
---|
298 | }
|
---|
299 | if ($self->{DEBUG}) {
|
---|
300 | my $pos = $self->Tell() - $rtnVal;
|
---|
301 | unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
|
---|
302 | $self->{DEBUG}->{$pos} = $rtnVal;
|
---|
303 | }
|
---|
304 | }
|
---|
305 | return $rtnVal;
|
---|
306 | }
|
---|
307 |
|
---|
308 | #------------------------------------------------------------------------------
|
---|
309 | # Read whole file into buffer (without changing read pointer)
|
---|
310 | # Inputs: 0) reference to RandomAccess object
|
---|
311 | sub Slurp($)
|
---|
312 | {
|
---|
313 | my $self = shift;
|
---|
314 | my $fp = $self->{FILE_PT} || return;
|
---|
315 | # read whole file into buffer (in large chunks)
|
---|
316 | my ($buff, $num);
|
---|
317 | while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
|
---|
318 | ${$self->{BUFF_PT}} .= $buff;
|
---|
319 | $self->{LEN} += $num;
|
---|
320 | }
|
---|
321 | }
|
---|
322 |
|
---|
323 | #------------------------------------------------------------------------------
|
---|
324 | # Purge internal buffer [internal use only]
|
---|
325 | # Inputs: 0) reference to RandomAccess object
|
---|
326 | # Returns: 1 on success, or 0 if current buffer position is negative
|
---|
327 | # Notes: This is called only in NoBuffer mode
|
---|
328 | sub Purge($)
|
---|
329 | {
|
---|
330 | my $self = shift;
|
---|
331 | return 1 unless $self->{FILE_PT};
|
---|
332 | return 0 if $self->{POS} < 0; # error if we can't read from here
|
---|
333 | if ($self->{POS} > $CHUNK_SIZE) {
|
---|
334 | my $purge = $self->{POS} - ($self->{POS} % $CHUNK_SIZE);
|
---|
335 | if ($purge >= $self->{LEN}) {
|
---|
336 | # read up to current position in 64k chunks, discarding as we go
|
---|
337 | while ($self->{POS} > $self->{LEN}) {
|
---|
338 | $self->{BASE} += $self->{LEN};
|
---|
339 | $self->{POS} -= $self->{LEN};
|
---|
340 | ${$self->{BUFF_PT}} = '';
|
---|
341 | $self->{LEN} = read($self->{FILE_PT}, ${$self->{BUFF_PT}}, $SKIP_SIZE);
|
---|
342 | last if $self->{LEN} < $SKIP_SIZE;
|
---|
343 | }
|
---|
344 | } elsif ($purge > 0) {
|
---|
345 | ${$self->{BUFF_PT}} = substr ${$self->{BUFF_PT}}, $purge;
|
---|
346 | $self->{BASE} += $purge;
|
---|
347 | $self->{POS} -= $purge;
|
---|
348 | $self->{LEN} -= $purge;
|
---|
349 | }
|
---|
350 | }
|
---|
351 | return 1;
|
---|
352 | }
|
---|
353 |
|
---|
354 | #------------------------------------------------------------------------------
|
---|
355 | # Set binary mode
|
---|
356 | # Inputs: 0) reference to RandomAccess object
|
---|
357 | sub BinMode($)
|
---|
358 | {
|
---|
359 | my $self = shift;
|
---|
360 | binmode($self->{FILE_PT}) if $self->{FILE_PT};
|
---|
361 | }
|
---|
362 |
|
---|
363 | #------------------------------------------------------------------------------
|
---|
364 | # Close the file and free the buffer
|
---|
365 | # Inputs: 0) reference to RandomAccess object
|
---|
366 | sub Close($)
|
---|
367 | {
|
---|
368 | my $self = shift;
|
---|
369 |
|
---|
370 | if ($self->{DEBUG}) {
|
---|
371 | local $_;
|
---|
372 | if ($self->Seek(0,2)) {
|
---|
373 | $self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
|
---|
374 | my $last;
|
---|
375 | my $tot = 0;
|
---|
376 | my $bad = 0;
|
---|
377 | foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
|
---|
378 | my $pos = $_;
|
---|
379 | my $len = $self->{DEBUG}->{$_};
|
---|
380 | if (defined $last and $last < $pos) {
|
---|
381 | my $bytes = $pos - $last;
|
---|
382 | $tot += $bytes;
|
---|
383 | $self->Seek($last);
|
---|
384 | my $buff;
|
---|
385 | $self->Read($buff, $bytes);
|
---|
386 | my $warn = '';
|
---|
387 | if ($buff =~ /[^\0]/) {
|
---|
388 | $bad += ($pos - $last);
|
---|
389 | $warn = ' - NON-ZERO!';
|
---|
390 | }
|
---|
391 | printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
|
---|
392 | }
|
---|
393 | my $cur = $pos + $len;
|
---|
394 | $last = $cur unless defined $last and $last > $cur;
|
---|
395 | }
|
---|
396 | print "$tot bytes missed";
|
---|
397 | $bad and print ", $bad non-zero!";
|
---|
398 | print "\n";
|
---|
399 | } else {
|
---|
400 | warn "File::RandomAccess DEBUG not working (file already closed?)\n";
|
---|
401 | }
|
---|
402 | delete $self->{DEBUG};
|
---|
403 | }
|
---|
404 | # close the file
|
---|
405 | if ($self->{FILE_PT}) {
|
---|
406 | close($self->{FILE_PT});
|
---|
407 | delete $self->{FILE_PT};
|
---|
408 | }
|
---|
409 | # reset the buffer
|
---|
410 | my $emptyBuff = '';
|
---|
411 | $self->{BUFF_PT} = \$emptyBuff;
|
---|
412 | $self->{BASE} = 0;
|
---|
413 | $self->{LEN} = 0;
|
---|
414 | $self->{POS} = 0;
|
---|
415 | }
|
---|
416 |
|
---|
417 | #------------------------------------------------------------------------------
|
---|
418 | 1; # end
|
---|