source: gs2-extensions/parallel-building/trunk/src/perllib/cpan/File/RandomAccess.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 12.5 KB
Line 
1#------------------------------------------------------------------------------
2# File: RandomAccess.pm
3#
4# Description: Buffer to support random access reading of sequential file
5#
6# Revisions: 02/11/2004 - P. Harvey Created
7# 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
8# 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
9# 01/02/2005 - P. Harvey Added DEBUG code
10# 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
11# multi-character EOL sequences
12# 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
13# generate "substr outside string" warning
14# 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
15# 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
16# 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
17# scalar with a multi-character newline
18# 01/24/2009 - PH Protect against reading too much at once
19#
20# Notes: Calls the normal file i/o routines unless SeekTest() fails, in
21# which case the file is buffered in memory to allow random access.
22# SeekTest() is called automatically when the object is created
23# unless specified.
24#
25# May also be used for string i/o (just pass a scalar reference)
26#
27# Legal: Copyright (c) 2003-2010 Phil Harvey (phil at owl.phy.queensu.ca)
28# This library is free software; you can redistribute it and/or
29# modify it under the same terms as Perl itself.
30#------------------------------------------------------------------------------
31
32package File::RandomAccess;
33
34use strict;
35require 5.002;
36require Exporter;
37
38use vars qw($VERSION @ISA @EXPORT_OK);
39$VERSION = '1.10';
40@ISA = qw(Exporter);
41
42sub Read($$$);
43
44# constants
45my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
46my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
47
48#------------------------------------------------------------------------------
49# Create new RandomAccess object
50# Inputs: 0) reference to RandomAccess object or RandomAccess class name
51# 1) file reference or scalar reference
52# 2) flag set if file is already random access (disables automatic SeekTest)
53sub new($$;$)
54{
55 my ($that, $filePt, $isRandom) = @_;
56 my $class = ref($that) || $that;
57 my $self;
58
59 if (ref $filePt eq 'SCALAR') {
60 # string i/o
61 $self = {
62 BUFF_PT => $filePt,
63 POS => 0,
64 LEN => length($$filePt),
65 TESTED => -1,
66 };
67 bless $self, $class;
68 } else {
69 # file i/o
70 my $buff = '';
71 $self = {
72 FILE_PT => $filePt, # file pointer
73 BUFF_PT => \$buff, # reference to file data
74 POS => 0, # current position in file
75 LEN => 0, # data length
76 TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
77 };
78 bless $self, $class;
79 $self->SeekTest() unless $isRandom;
80 }
81 return $self;
82}
83
84#------------------------------------------------------------------------------
85# Enable DEBUG code
86# Inputs: 0) reference to RandomAccess object
87sub Debug($)
88{
89 my $self = shift;
90 $self->{DEBUG} = { };
91}
92
93#------------------------------------------------------------------------------
94# Perform seek test and turn on buffering if necessary
95# Inputs: 0) reference to RandomAccess object
96# Returns: 1 if seek test passed (ie. no buffering required)
97# Notes: Must be done before any other i/o
98sub SeekTest($)
99{
100 my $self = shift;
101 unless ($self->{TESTED}) {
102 my $fp = $self->{FILE_PT};
103 if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
104 $self->{TESTED} = 1; # test passed
105 } else {
106 $self->{TESTED} = -1; # test failed (requires buffering)
107 }
108 }
109 return $self->{TESTED} == 1 ? 1 : 0;
110}
111
112#------------------------------------------------------------------------------
113# Get current position in file
114# Inputs: 0) reference to RandomAccess object
115# Returns: current position in file
116sub Tell($)
117{
118 my $self = shift;
119 my $rtnVal;
120 if ($self->{TESTED} < 0) {
121 $rtnVal = $self->{POS};
122 } else {
123 $rtnVal = tell($self->{FILE_PT});
124 }
125 return $rtnVal;
126}
127
128#------------------------------------------------------------------------------
129# Seek to position in file
130# Inputs: 0) reference to RandomAccess object
131# 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
132# Returns: 1 on success
133# Notes: When buffered, this doesn't quite behave like seek() since it will return
134# success even if you seek outside the limits of the file. However if you
135# do this, you will get an error on your next Read().
136sub Seek($$;$)
137{
138 my ($self, $num, $whence) = @_;
139 $whence = 0 unless defined $whence;
140 my $rtnVal;
141 if ($self->{TESTED} < 0) {
142 my $newPos;
143 if ($whence == 0) {
144 $newPos = $num; # from start of file
145 } elsif ($whence == 1) {
146 $newPos = $num + $self->{POS}; # relative to current position
147 } else {
148 $self->Slurp(); # read whole file into buffer
149 $newPos = $num + $self->{LEN}; # relative to end of file
150 }
151 if ($newPos >= 0) {
152 $self->{POS} = $newPos;
153 $rtnVal = 1;
154 }
155 } else {
156 $rtnVal = seek($self->{FILE_PT}, $num, $whence);
157 }
158 return $rtnVal;
159}
160
161#------------------------------------------------------------------------------
162# Read from the file
163# Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
164# Returns: Number of bytes read
165sub Read($$$)
166{
167 my $self = shift;
168 my $len = $_[1];
169 my $rtnVal;
170
171 # protect against reading too much at once
172 # (also from dying with a "Negative length" error)
173 if ($len & 0xf8000000) {
174 return 0 if $len < 0;
175 # read in smaller blocks because Windows attempts to pre-allocate
176 # memory for the full size, which can lead to an out-of-memory error
177 my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
178 my $num = Read($self, $_[0], $maxLen);
179 return $num if $num < $maxLen;
180 for (;;) {
181 $len -= $maxLen;
182 last if $len <= 0;
183 my $l = $len < $maxLen ? $len : $maxLen;
184 my $buff;
185 my $n = Read($self, $buff, $l);
186 last unless $n;
187 $_[0] .= $buff;
188 $num += $n;
189 last if $n < $l;
190 }
191 return $num;
192 }
193 # read through our buffer if necessary
194 if ($self->{TESTED} < 0) {
195 my $buff;
196 my $newPos = $self->{POS} + $len;
197 # number of bytes to read from file
198 my $num = $newPos - $self->{LEN};
199 if ($num > 0 and $self->{FILE_PT}) {
200 # read data from file in multiples of $CHUNK_SIZE
201 $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
202 $num = read($self->{FILE_PT}, $buff, $num);
203 if ($num) {
204 ${$self->{BUFF_PT}} .= $buff;
205 $self->{LEN} += $num;
206 }
207 }
208 # number of bytes left in data buffer
209 $num = $self->{LEN} - $self->{POS};
210 if ($len <= $num) {
211 $rtnVal = $len;
212 } elsif ($num <= 0) {
213 $_[0] = '';
214 return 0;
215 } else {
216 $rtnVal = $num;
217 }
218 # return data from our buffer
219 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
220 $self->{POS} += $rtnVal;
221 } else {
222 # read directly from file
223 $_[0] = '' unless defined $_[0];
224 $rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
225 }
226 if ($self->{DEBUG}) {
227 my $pos = $self->Tell() - $rtnVal;
228 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
229 $self->{DEBUG}->{$pos} = $rtnVal;
230 }
231 }
232 return $rtnVal;
233}
234
235#------------------------------------------------------------------------------
236# Read a line from file (end of line is $/)
237# Inputs: 0) reference to RandomAccess object, 1) buffer
238# Returns: Number of bytes read
239sub ReadLine($$)
240{
241 my $self = shift;
242 my $rtnVal;
243 my $fp = $self->{FILE_PT};
244
245 if ($self->{TESTED} < 0) {
246 my ($num, $buff);
247 my $pos = $self->{POS};
248 if ($fp) {
249 # make sure we have some data after the current position
250 while ($self->{LEN} <= $pos) {
251 $num = read($fp, $buff, $CHUNK_SIZE);
252 return 0 unless $num;
253 ${$self->{BUFF_PT}} .= $buff;
254 $self->{LEN} += $num;
255 }
256 # scan and read until we find the EOL (or hit EOF)
257 for (;;) {
258 $pos = index(${$self->{BUFF_PT}}, $/, $pos);
259 if ($pos >= 0) {
260 $pos += length($/);
261 last;
262 }
263 $pos = $self->{LEN}; # have scanned to end of buffer
264 $num = read($fp, $buff, $CHUNK_SIZE) or last;
265 ${$self->{BUFF_PT}} .= $buff;
266 $self->{LEN} += $num;
267 }
268 } else {
269 # string i/o
270 $pos = index(${$self->{BUFF_PT}}, $/, $pos);
271 if ($pos < 0) {
272 $pos = $self->{LEN};
273 $self->{POS} = $pos if $self->{POS} > $pos;
274 } else {
275 $pos += length($/);
276 }
277 }
278 # read the line from our buffer
279 $rtnVal = $pos - $self->{POS};
280 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
281 $self->{POS} = $pos;
282 } else {
283 $_[0] = <$fp>;
284 if (defined $_[0]) {
285 $rtnVal = length($_[0]);
286 } else {
287 $rtnVal = 0;
288 }
289 }
290 if ($self->{DEBUG}) {
291 my $pos = $self->Tell() - $rtnVal;
292 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
293 $self->{DEBUG}->{$pos} = $rtnVal;
294 }
295 }
296 return $rtnVal;
297}
298
299#------------------------------------------------------------------------------
300# Read whole file into buffer (without changing read pointer)
301# Inputs: 0) reference to RandomAccess object
302sub Slurp($)
303{
304 my $self = shift;
305 my $fp = $self->{FILE_PT} || return;
306 # read whole file into buffer (in large chunks)
307 my ($buff, $num);
308 while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
309 ${$self->{BUFF_PT}} .= $buff;
310 $self->{LEN} += $num;
311 }
312}
313
314
315#------------------------------------------------------------------------------
316# set binary mode
317# Inputs: 0) reference to RandomAccess object
318sub BinMode($)
319{
320 my $self = shift;
321 binmode($self->{FILE_PT}) if $self->{FILE_PT};
322}
323
324#------------------------------------------------------------------------------
325# close the file and free the buffer
326# Inputs: 0) reference to RandomAccess object
327sub Close($)
328{
329 my $self = shift;
330
331 if ($self->{DEBUG}) {
332 local $_;
333 if ($self->Seek(0,2)) {
334 $self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
335 my $last;
336 my $tot = 0;
337 my $bad = 0;
338 foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
339 my $pos = $_;
340 my $len = $self->{DEBUG}->{$_};
341 if (defined $last and $last < $pos) {
342 my $bytes = $pos - $last;
343 $tot += $bytes;
344 $self->Seek($last);
345 my $buff;
346 $self->Read($buff, $bytes);
347 my $warn = '';
348 if ($buff =~ /[^\0]/) {
349 $bad += ($pos - $last);
350 $warn = ' - NON-ZERO!';
351 }
352 printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
353 }
354 my $cur = $pos + $len;
355 $last = $cur unless defined $last and $last > $cur;
356 }
357 print "$tot bytes missed";
358 $bad and print ", $bad non-zero!";
359 print "\n";
360 } else {
361 warn "File::RandomAccess DEBUG not working (file already closed?)\n";
362 }
363 delete $self->{DEBUG};
364 }
365 # close the file
366 if ($self->{FILE_PT}) {
367 close($self->{FILE_PT});
368 delete $self->{FILE_PT};
369 }
370 # reset the buffer
371 my $emptyBuff = '';
372 $self->{BUFF_PT} = \$emptyBuff;
373 $self->{LEN} = 0;
374 $self->{POS} = 0;
375}
376
377#------------------------------------------------------------------------------
3781; # end
Note: See TracBrowser for help on using the repository browser.