1 | #------------------------------------------------------------------------------
|
---|
2 | # File: RandomAccess.pm
|
---|
3 | #
|
---|
4 | # Description: Buffer to support random access reading of sequential file
|
---|
5 | #
|
---|
6 | # Revisions: 02/11/2004 - P. Harvey Created
|
---|
7 | # 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
|
---|
8 | # 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
|
---|
9 | # 01/02/2005 - P. Harvey Added DEBUG code
|
---|
10 | # 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
|
---|
11 | # multi-character EOL sequences
|
---|
12 | # 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
|
---|
13 | # generate "substr outside string" warning
|
---|
14 | # 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
|
---|
15 | # 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
|
---|
16 | # 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
|
---|
17 | # scalar with a multi-character newline
|
---|
18 | # 01/24/2009 - PH Protect against reading too much at once
|
---|
19 | #
|
---|
20 | # Notes: Calls the normal file i/o routines unless SeekTest() fails, in
|
---|
21 | # which case the file is buffered in memory to allow random access.
|
---|
22 | # SeekTest() is called automatically when the object is created
|
---|
23 | # unless specified.
|
---|
24 | #
|
---|
25 | # May also be used for string i/o (just pass a scalar reference)
|
---|
26 | #
|
---|
27 | # Legal: Copyright (c) 2003-2010 Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
28 | # This library is free software; you can redistribute it and/or
|
---|
29 | # modify it under the same terms as Perl itself.
|
---|
30 | #------------------------------------------------------------------------------
|
---|
31 |
|
---|
32 | package File::RandomAccess;
|
---|
33 |
|
---|
34 | use strict;
|
---|
35 | require 5.002;
|
---|
36 | require Exporter;
|
---|
37 |
|
---|
38 | use vars qw($VERSION @ISA @EXPORT_OK);
|
---|
39 | $VERSION = '1.10';
|
---|
40 | @ISA = qw(Exporter);
|
---|
41 |
|
---|
42 | sub Read($$$);
|
---|
43 |
|
---|
44 | # constants
|
---|
45 | my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
|
---|
46 | my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
|
---|
47 |
|
---|
48 | #------------------------------------------------------------------------------
|
---|
49 | # Create new RandomAccess object
|
---|
50 | # Inputs: 0) reference to RandomAccess object or RandomAccess class name
|
---|
51 | # 1) file reference or scalar reference
|
---|
52 | # 2) flag set if file is already random access (disables automatic SeekTest)
|
---|
53 | sub new($$;$)
|
---|
54 | {
|
---|
55 | my ($that, $filePt, $isRandom) = @_;
|
---|
56 | my $class = ref($that) || $that;
|
---|
57 | my $self;
|
---|
58 |
|
---|
59 | if (ref $filePt eq 'SCALAR') {
|
---|
60 | # string i/o
|
---|
61 | $self = {
|
---|
62 | BUFF_PT => $filePt,
|
---|
63 | POS => 0,
|
---|
64 | LEN => length($$filePt),
|
---|
65 | TESTED => -1,
|
---|
66 | };
|
---|
67 | bless $self, $class;
|
---|
68 | } else {
|
---|
69 | # file i/o
|
---|
70 | my $buff = '';
|
---|
71 | $self = {
|
---|
72 | FILE_PT => $filePt, # file pointer
|
---|
73 | BUFF_PT => \$buff, # reference to file data
|
---|
74 | POS => 0, # current position in file
|
---|
75 | LEN => 0, # data length
|
---|
76 | TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
|
---|
77 | };
|
---|
78 | bless $self, $class;
|
---|
79 | $self->SeekTest() unless $isRandom;
|
---|
80 | }
|
---|
81 | return $self;
|
---|
82 | }
|
---|
83 |
|
---|
84 | #------------------------------------------------------------------------------
|
---|
85 | # Enable DEBUG code
|
---|
86 | # Inputs: 0) reference to RandomAccess object
|
---|
87 | sub Debug($)
|
---|
88 | {
|
---|
89 | my $self = shift;
|
---|
90 | $self->{DEBUG} = { };
|
---|
91 | }
|
---|
92 |
|
---|
93 | #------------------------------------------------------------------------------
|
---|
94 | # Perform seek test and turn on buffering if necessary
|
---|
95 | # Inputs: 0) reference to RandomAccess object
|
---|
96 | # Returns: 1 if seek test passed (ie. no buffering required)
|
---|
97 | # Notes: Must be done before any other i/o
|
---|
98 | sub SeekTest($)
|
---|
99 | {
|
---|
100 | my $self = shift;
|
---|
101 | unless ($self->{TESTED}) {
|
---|
102 | my $fp = $self->{FILE_PT};
|
---|
103 | if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
|
---|
104 | $self->{TESTED} = 1; # test passed
|
---|
105 | } else {
|
---|
106 | $self->{TESTED} = -1; # test failed (requires buffering)
|
---|
107 | }
|
---|
108 | }
|
---|
109 | return $self->{TESTED} == 1 ? 1 : 0;
|
---|
110 | }
|
---|
111 |
|
---|
112 | #------------------------------------------------------------------------------
|
---|
113 | # Get current position in file
|
---|
114 | # Inputs: 0) reference to RandomAccess object
|
---|
115 | # Returns: current position in file
|
---|
116 | sub Tell($)
|
---|
117 | {
|
---|
118 | my $self = shift;
|
---|
119 | my $rtnVal;
|
---|
120 | if ($self->{TESTED} < 0) {
|
---|
121 | $rtnVal = $self->{POS};
|
---|
122 | } else {
|
---|
123 | $rtnVal = tell($self->{FILE_PT});
|
---|
124 | }
|
---|
125 | return $rtnVal;
|
---|
126 | }
|
---|
127 |
|
---|
128 | #------------------------------------------------------------------------------
|
---|
129 | # Seek to position in file
|
---|
130 | # Inputs: 0) reference to RandomAccess object
|
---|
131 | # 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
|
---|
132 | # Returns: 1 on success
|
---|
133 | # Notes: When buffered, this doesn't quite behave like seek() since it will return
|
---|
134 | # success even if you seek outside the limits of the file. However if you
|
---|
135 | # do this, you will get an error on your next Read().
|
---|
136 | sub Seek($$;$)
|
---|
137 | {
|
---|
138 | my ($self, $num, $whence) = @_;
|
---|
139 | $whence = 0 unless defined $whence;
|
---|
140 | my $rtnVal;
|
---|
141 | if ($self->{TESTED} < 0) {
|
---|
142 | my $newPos;
|
---|
143 | if ($whence == 0) {
|
---|
144 | $newPos = $num; # from start of file
|
---|
145 | } elsif ($whence == 1) {
|
---|
146 | $newPos = $num + $self->{POS}; # relative to current position
|
---|
147 | } else {
|
---|
148 | $self->Slurp(); # read whole file into buffer
|
---|
149 | $newPos = $num + $self->{LEN}; # relative to end of file
|
---|
150 | }
|
---|
151 | if ($newPos >= 0) {
|
---|
152 | $self->{POS} = $newPos;
|
---|
153 | $rtnVal = 1;
|
---|
154 | }
|
---|
155 | } else {
|
---|
156 | $rtnVal = seek($self->{FILE_PT}, $num, $whence);
|
---|
157 | }
|
---|
158 | return $rtnVal;
|
---|
159 | }
|
---|
160 |
|
---|
161 | #------------------------------------------------------------------------------
|
---|
162 | # Read from the file
|
---|
163 | # Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
|
---|
164 | # Returns: Number of bytes read
|
---|
165 | sub Read($$$)
|
---|
166 | {
|
---|
167 | my $self = shift;
|
---|
168 | my $len = $_[1];
|
---|
169 | my $rtnVal;
|
---|
170 |
|
---|
171 | # protect against reading too much at once
|
---|
172 | # (also from dying with a "Negative length" error)
|
---|
173 | if ($len & 0xf8000000) {
|
---|
174 | return 0 if $len < 0;
|
---|
175 | # read in smaller blocks because Windows attempts to pre-allocate
|
---|
176 | # memory for the full size, which can lead to an out-of-memory error
|
---|
177 | my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
|
---|
178 | my $num = Read($self, $_[0], $maxLen);
|
---|
179 | return $num if $num < $maxLen;
|
---|
180 | for (;;) {
|
---|
181 | $len -= $maxLen;
|
---|
182 | last if $len <= 0;
|
---|
183 | my $l = $len < $maxLen ? $len : $maxLen;
|
---|
184 | my $buff;
|
---|
185 | my $n = Read($self, $buff, $l);
|
---|
186 | last unless $n;
|
---|
187 | $_[0] .= $buff;
|
---|
188 | $num += $n;
|
---|
189 | last if $n < $l;
|
---|
190 | }
|
---|
191 | return $num;
|
---|
192 | }
|
---|
193 | # read through our buffer if necessary
|
---|
194 | if ($self->{TESTED} < 0) {
|
---|
195 | my $buff;
|
---|
196 | my $newPos = $self->{POS} + $len;
|
---|
197 | # number of bytes to read from file
|
---|
198 | my $num = $newPos - $self->{LEN};
|
---|
199 | if ($num > 0 and $self->{FILE_PT}) {
|
---|
200 | # read data from file in multiples of $CHUNK_SIZE
|
---|
201 | $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
|
---|
202 | $num = read($self->{FILE_PT}, $buff, $num);
|
---|
203 | if ($num) {
|
---|
204 | ${$self->{BUFF_PT}} .= $buff;
|
---|
205 | $self->{LEN} += $num;
|
---|
206 | }
|
---|
207 | }
|
---|
208 | # number of bytes left in data buffer
|
---|
209 | $num = $self->{LEN} - $self->{POS};
|
---|
210 | if ($len <= $num) {
|
---|
211 | $rtnVal = $len;
|
---|
212 | } elsif ($num <= 0) {
|
---|
213 | $_[0] = '';
|
---|
214 | return 0;
|
---|
215 | } else {
|
---|
216 | $rtnVal = $num;
|
---|
217 | }
|
---|
218 | # return data from our buffer
|
---|
219 | $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
|
---|
220 | $self->{POS} += $rtnVal;
|
---|
221 | } else {
|
---|
222 | # read directly from file
|
---|
223 | $_[0] = '' unless defined $_[0];
|
---|
224 | $rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
|
---|
225 | }
|
---|
226 | if ($self->{DEBUG}) {
|
---|
227 | my $pos = $self->Tell() - $rtnVal;
|
---|
228 | unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
|
---|
229 | $self->{DEBUG}->{$pos} = $rtnVal;
|
---|
230 | }
|
---|
231 | }
|
---|
232 | return $rtnVal;
|
---|
233 | }
|
---|
234 |
|
---|
235 | #------------------------------------------------------------------------------
|
---|
236 | # Read a line from file (end of line is $/)
|
---|
237 | # Inputs: 0) reference to RandomAccess object, 1) buffer
|
---|
238 | # Returns: Number of bytes read
|
---|
239 | sub ReadLine($$)
|
---|
240 | {
|
---|
241 | my $self = shift;
|
---|
242 | my $rtnVal;
|
---|
243 | my $fp = $self->{FILE_PT};
|
---|
244 |
|
---|
245 | if ($self->{TESTED} < 0) {
|
---|
246 | my ($num, $buff);
|
---|
247 | my $pos = $self->{POS};
|
---|
248 | if ($fp) {
|
---|
249 | # make sure we have some data after the current position
|
---|
250 | while ($self->{LEN} <= $pos) {
|
---|
251 | $num = read($fp, $buff, $CHUNK_SIZE);
|
---|
252 | return 0 unless $num;
|
---|
253 | ${$self->{BUFF_PT}} .= $buff;
|
---|
254 | $self->{LEN} += $num;
|
---|
255 | }
|
---|
256 | # scan and read until we find the EOL (or hit EOF)
|
---|
257 | for (;;) {
|
---|
258 | $pos = index(${$self->{BUFF_PT}}, $/, $pos);
|
---|
259 | if ($pos >= 0) {
|
---|
260 | $pos += length($/);
|
---|
261 | last;
|
---|
262 | }
|
---|
263 | $pos = $self->{LEN}; # have scanned to end of buffer
|
---|
264 | $num = read($fp, $buff, $CHUNK_SIZE) or last;
|
---|
265 | ${$self->{BUFF_PT}} .= $buff;
|
---|
266 | $self->{LEN} += $num;
|
---|
267 | }
|
---|
268 | } else {
|
---|
269 | # string i/o
|
---|
270 | $pos = index(${$self->{BUFF_PT}}, $/, $pos);
|
---|
271 | if ($pos < 0) {
|
---|
272 | $pos = $self->{LEN};
|
---|
273 | $self->{POS} = $pos if $self->{POS} > $pos;
|
---|
274 | } else {
|
---|
275 | $pos += length($/);
|
---|
276 | }
|
---|
277 | }
|
---|
278 | # read the line from our buffer
|
---|
279 | $rtnVal = $pos - $self->{POS};
|
---|
280 | $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
|
---|
281 | $self->{POS} = $pos;
|
---|
282 | } else {
|
---|
283 | $_[0] = <$fp>;
|
---|
284 | if (defined $_[0]) {
|
---|
285 | $rtnVal = length($_[0]);
|
---|
286 | } else {
|
---|
287 | $rtnVal = 0;
|
---|
288 | }
|
---|
289 | }
|
---|
290 | if ($self->{DEBUG}) {
|
---|
291 | my $pos = $self->Tell() - $rtnVal;
|
---|
292 | unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
|
---|
293 | $self->{DEBUG}->{$pos} = $rtnVal;
|
---|
294 | }
|
---|
295 | }
|
---|
296 | return $rtnVal;
|
---|
297 | }
|
---|
298 |
|
---|
299 | #------------------------------------------------------------------------------
|
---|
300 | # Read whole file into buffer (without changing read pointer)
|
---|
301 | # Inputs: 0) reference to RandomAccess object
|
---|
302 | sub Slurp($)
|
---|
303 | {
|
---|
304 | my $self = shift;
|
---|
305 | my $fp = $self->{FILE_PT} || return;
|
---|
306 | # read whole file into buffer (in large chunks)
|
---|
307 | my ($buff, $num);
|
---|
308 | while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
|
---|
309 | ${$self->{BUFF_PT}} .= $buff;
|
---|
310 | $self->{LEN} += $num;
|
---|
311 | }
|
---|
312 | }
|
---|
313 |
|
---|
314 |
|
---|
315 | #------------------------------------------------------------------------------
|
---|
316 | # set binary mode
|
---|
317 | # Inputs: 0) reference to RandomAccess object
|
---|
318 | sub BinMode($)
|
---|
319 | {
|
---|
320 | my $self = shift;
|
---|
321 | binmode($self->{FILE_PT}) if $self->{FILE_PT};
|
---|
322 | }
|
---|
323 |
|
---|
324 | #------------------------------------------------------------------------------
|
---|
325 | # close the file and free the buffer
|
---|
326 | # Inputs: 0) reference to RandomAccess object
|
---|
327 | sub Close($)
|
---|
328 | {
|
---|
329 | my $self = shift;
|
---|
330 |
|
---|
331 | if ($self->{DEBUG}) {
|
---|
332 | local $_;
|
---|
333 | if ($self->Seek(0,2)) {
|
---|
334 | $self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
|
---|
335 | my $last;
|
---|
336 | my $tot = 0;
|
---|
337 | my $bad = 0;
|
---|
338 | foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
|
---|
339 | my $pos = $_;
|
---|
340 | my $len = $self->{DEBUG}->{$_};
|
---|
341 | if (defined $last and $last < $pos) {
|
---|
342 | my $bytes = $pos - $last;
|
---|
343 | $tot += $bytes;
|
---|
344 | $self->Seek($last);
|
---|
345 | my $buff;
|
---|
346 | $self->Read($buff, $bytes);
|
---|
347 | my $warn = '';
|
---|
348 | if ($buff =~ /[^\0]/) {
|
---|
349 | $bad += ($pos - $last);
|
---|
350 | $warn = ' - NON-ZERO!';
|
---|
351 | }
|
---|
352 | printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
|
---|
353 | }
|
---|
354 | my $cur = $pos + $len;
|
---|
355 | $last = $cur unless defined $last and $last > $cur;
|
---|
356 | }
|
---|
357 | print "$tot bytes missed";
|
---|
358 | $bad and print ", $bad non-zero!";
|
---|
359 | print "\n";
|
---|
360 | } else {
|
---|
361 | warn "File::RandomAccess DEBUG not working (file already closed?)\n";
|
---|
362 | }
|
---|
363 | delete $self->{DEBUG};
|
---|
364 | }
|
---|
365 | # close the file
|
---|
366 | if ($self->{FILE_PT}) {
|
---|
367 | close($self->{FILE_PT});
|
---|
368 | delete $self->{FILE_PT};
|
---|
369 | }
|
---|
370 | # reset the buffer
|
---|
371 | my $emptyBuff = '';
|
---|
372 | $self->{BUFF_PT} = \$emptyBuff;
|
---|
373 | $self->{LEN} = 0;
|
---|
374 | $self->{POS} = 0;
|
---|
375 | }
|
---|
376 |
|
---|
377 | #------------------------------------------------------------------------------
|
---|
378 | 1; # end
|
---|