[16844] | 1 | #------------------------------------------------------------------------------
|
---|
| 2 | # File: RandomAccess.pm
|
---|
| 3 | #
|
---|
| 4 | # Description: Buffer to support random access reading of sequential file
|
---|
| 5 | #
|
---|
[24107] | 6 | # Revisions: 02/11/2004 - P. Harvey Created
|
---|
| 7 | # 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
|
---|
| 8 | # 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
|
---|
| 9 | # 01/02/2005 - P. Harvey Added DEBUG code
|
---|
| 10 | # 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
|
---|
| 11 | # multi-character EOL sequences
|
---|
| 12 | # 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
|
---|
| 13 | # generate "substr outside string" warning
|
---|
| 14 | # 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
|
---|
| 15 | # 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
|
---|
| 16 | # 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
|
---|
| 17 | # scalar with a multi-character newline
|
---|
| 18 | # 01/24/2009 - PH Protect against reading too much at once
|
---|
[16844] | 19 | #
|
---|
| 20 | # Notes: Calls the normal file i/o routines unless SeekTest() fails, in
|
---|
| 21 | # which case the file is buffered in memory to allow random access.
|
---|
| 22 | # SeekTest() is called automatically when the object is created
|
---|
| 23 | # unless specified.
|
---|
| 24 | #
|
---|
| 25 | # May also be used for string i/o (just pass a scalar reference)
|
---|
| 26 | #
|
---|
[24107] | 27 | # Legal: Copyright (c) 2003-2010 Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
[16844] | 28 | # This library is free software; you can redistribute it and/or
|
---|
| 29 | # modify it under the same terms as Perl itself.
|
---|
| 30 | #------------------------------------------------------------------------------
|
---|
| 31 |
|
---|
| 32 | package File::RandomAccess;
|
---|
| 33 |
|
---|
| 34 | use strict;
|
---|
| 35 | require 5.002;
|
---|
| 36 | require Exporter;
|
---|
| 37 |
|
---|
| 38 | use vars qw($VERSION @ISA @EXPORT_OK);
|
---|
[24107] | 39 | $VERSION = '1.10';
|
---|
[16844] | 40 | @ISA = qw(Exporter);
|
---|
| 41 |
|
---|
[24107] | 42 | sub Read($$$);
|
---|
| 43 |
|
---|
[16844] | 44 | # constants
|
---|
| 45 | my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
|
---|
| 46 | my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
|
---|
| 47 |
|
---|
| 48 | #------------------------------------------------------------------------------
|
---|
| 49 | # Create new RandomAccess object
|
---|
| 50 | # Inputs: 0) reference to RandomAccess object or RandomAccess class name
|
---|
| 51 | # 1) file reference or scalar reference
|
---|
| 52 | # 2) flag set if file is already random access (disables automatic SeekTest)
|
---|
| 53 | sub new($$;$)
|
---|
| 54 | {
|
---|
| 55 | my ($that, $filePt, $isRandom) = @_;
|
---|
| 56 | my $class = ref($that) || $that;
|
---|
| 57 | my $self;
|
---|
| 58 |
|
---|
| 59 | if (ref $filePt eq 'SCALAR') {
|
---|
| 60 | # string i/o
|
---|
| 61 | $self = {
|
---|
| 62 | BUFF_PT => $filePt,
|
---|
| 63 | POS => 0,
|
---|
| 64 | LEN => length($$filePt),
|
---|
| 65 | TESTED => -1,
|
---|
| 66 | };
|
---|
| 67 | bless $self, $class;
|
---|
| 68 | } else {
|
---|
| 69 | # file i/o
|
---|
| 70 | my $buff = '';
|
---|
[24107] | 71 | $self = {
|
---|
[16844] | 72 | FILE_PT => $filePt, # file pointer
|
---|
| 73 | BUFF_PT => \$buff, # reference to file data
|
---|
| 74 | POS => 0, # current position in file
|
---|
| 75 | LEN => 0, # data length
|
---|
| 76 | TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
|
---|
| 77 | };
|
---|
| 78 | bless $self, $class;
|
---|
| 79 | $self->SeekTest() unless $isRandom;
|
---|
| 80 | }
|
---|
| 81 | return $self;
|
---|
| 82 | }
|
---|
| 83 |
|
---|
| 84 | #------------------------------------------------------------------------------
|
---|
| 85 | # Enable DEBUG code
|
---|
| 86 | # Inputs: 0) reference to RandomAccess object
|
---|
| 87 | sub Debug($)
|
---|
| 88 | {
|
---|
| 89 | my $self = shift;
|
---|
| 90 | $self->{DEBUG} = { };
|
---|
| 91 | }
|
---|
| 92 |
|
---|
| 93 | #------------------------------------------------------------------------------
|
---|
| 94 | # Perform seek test and turn on buffering if necessary
|
---|
| 95 | # Inputs: 0) reference to RandomAccess object
|
---|
| 96 | # Returns: 1 if seek test passed (ie. no buffering required)
|
---|
| 97 | # Notes: Must be done before any other i/o
|
---|
| 98 | sub SeekTest($)
|
---|
| 99 | {
|
---|
| 100 | my $self = shift;
|
---|
| 101 | unless ($self->{TESTED}) {
|
---|
| 102 | my $fp = $self->{FILE_PT};
|
---|
| 103 | if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
|
---|
| 104 | $self->{TESTED} = 1; # test passed
|
---|
| 105 | } else {
|
---|
| 106 | $self->{TESTED} = -1; # test failed (requires buffering)
|
---|
| 107 | }
|
---|
| 108 | }
|
---|
| 109 | return $self->{TESTED} == 1 ? 1 : 0;
|
---|
| 110 | }
|
---|
| 111 |
|
---|
| 112 | #------------------------------------------------------------------------------
|
---|
| 113 | # Get current position in file
|
---|
| 114 | # Inputs: 0) reference to RandomAccess object
|
---|
| 115 | # Returns: current position in file
|
---|
| 116 | sub Tell($)
|
---|
| 117 | {
|
---|
| 118 | my $self = shift;
|
---|
| 119 | my $rtnVal;
|
---|
| 120 | if ($self->{TESTED} < 0) {
|
---|
| 121 | $rtnVal = $self->{POS};
|
---|
| 122 | } else {
|
---|
| 123 | $rtnVal = tell($self->{FILE_PT});
|
---|
| 124 | }
|
---|
| 125 | return $rtnVal;
|
---|
| 126 | }
|
---|
| 127 |
|
---|
| 128 | #------------------------------------------------------------------------------
|
---|
| 129 | # Seek to position in file
|
---|
| 130 | # Inputs: 0) reference to RandomAccess object
|
---|
| 131 | # 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
|
---|
| 132 | # Returns: 1 on success
|
---|
| 133 | # Notes: When buffered, this doesn't quite behave like seek() since it will return
|
---|
| 134 | # success even if you seek outside the limits of the file. However if you
|
---|
| 135 | # do this, you will get an error on your next Read().
|
---|
| 136 | sub Seek($$;$)
|
---|
| 137 | {
|
---|
| 138 | my ($self, $num, $whence) = @_;
|
---|
| 139 | $whence = 0 unless defined $whence;
|
---|
| 140 | my $rtnVal;
|
---|
| 141 | if ($self->{TESTED} < 0) {
|
---|
| 142 | my $newPos;
|
---|
| 143 | if ($whence == 0) {
|
---|
| 144 | $newPos = $num; # from start of file
|
---|
| 145 | } elsif ($whence == 1) {
|
---|
| 146 | $newPos = $num + $self->{POS}; # relative to current position
|
---|
| 147 | } else {
|
---|
| 148 | $self->Slurp(); # read whole file into buffer
|
---|
| 149 | $newPos = $num + $self->{LEN}; # relative to end of file
|
---|
| 150 | }
|
---|
| 151 | if ($newPos >= 0) {
|
---|
| 152 | $self->{POS} = $newPos;
|
---|
| 153 | $rtnVal = 1;
|
---|
| 154 | }
|
---|
| 155 | } else {
|
---|
| 156 | $rtnVal = seek($self->{FILE_PT}, $num, $whence);
|
---|
| 157 | }
|
---|
| 158 | return $rtnVal;
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | #------------------------------------------------------------------------------
|
---|
| 162 | # Read from the file
|
---|
[24107] | 163 | # Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
|
---|
[16844] | 164 | # Returns: Number of bytes read
|
---|
| 165 | sub Read($$$)
|
---|
| 166 | {
|
---|
| 167 | my $self = shift;
|
---|
| 168 | my $len = $_[1];
|
---|
| 169 | my $rtnVal;
|
---|
| 170 |
|
---|
[24107] | 171 | # protect against reading too much at once
|
---|
| 172 | # (also from dying with a "Negative length" error)
|
---|
| 173 | if ($len & 0xf8000000) {
|
---|
| 174 | return 0 if $len < 0;
|
---|
| 175 | # read in smaller blocks because Windows attempts to pre-allocate
|
---|
| 176 | # memory for the full size, which can lead to an out-of-memory error
|
---|
| 177 | my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
|
---|
| 178 | my $num = Read($self, $_[0], $maxLen);
|
---|
| 179 | return $num if $num < $maxLen;
|
---|
| 180 | for (;;) {
|
---|
| 181 | $len -= $maxLen;
|
---|
| 182 | last if $len <= 0;
|
---|
| 183 | my $l = $len < $maxLen ? $len : $maxLen;
|
---|
| 184 | my $buff;
|
---|
| 185 | my $n = Read($self, $buff, $l);
|
---|
| 186 | last unless $n;
|
---|
| 187 | $_[0] .= $buff;
|
---|
| 188 | $num += $n;
|
---|
| 189 | last if $n < $l;
|
---|
| 190 | }
|
---|
| 191 | return $num;
|
---|
| 192 | }
|
---|
| 193 | # read through our buffer if necessary
|
---|
[16844] | 194 | if ($self->{TESTED} < 0) {
|
---|
| 195 | my $buff;
|
---|
| 196 | my $newPos = $self->{POS} + $len;
|
---|
| 197 | # number of bytes to read from file
|
---|
| 198 | my $num = $newPos - $self->{LEN};
|
---|
| 199 | if ($num > 0 and $self->{FILE_PT}) {
|
---|
| 200 | # read data from file in multiples of $CHUNK_SIZE
|
---|
| 201 | $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
|
---|
| 202 | $num = read($self->{FILE_PT}, $buff, $num);
|
---|
| 203 | if ($num) {
|
---|
| 204 | ${$self->{BUFF_PT}} .= $buff;
|
---|
| 205 | $self->{LEN} += $num;
|
---|
| 206 | }
|
---|
| 207 | }
|
---|
| 208 | # number of bytes left in data buffer
|
---|
| 209 | $num = $self->{LEN} - $self->{POS};
|
---|
| 210 | if ($len <= $num) {
|
---|
| 211 | $rtnVal = $len;
|
---|
| 212 | } elsif ($num <= 0) {
|
---|
| 213 | $_[0] = '';
|
---|
| 214 | return 0;
|
---|
| 215 | } else {
|
---|
| 216 | $rtnVal = $num;
|
---|
| 217 | }
|
---|
| 218 | # return data from our buffer
|
---|
| 219 | $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
|
---|
| 220 | $self->{POS} += $rtnVal;
|
---|
| 221 | } else {
|
---|
[24107] | 222 | # read directly from file
|
---|
[16844] | 223 | $_[0] = '' unless defined $_[0];
|
---|
| 224 | $rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
|
---|
| 225 | }
|
---|
| 226 | if ($self->{DEBUG}) {
|
---|
| 227 | my $pos = $self->Tell() - $rtnVal;
|
---|
| 228 | unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
|
---|
| 229 | $self->{DEBUG}->{$pos} = $rtnVal;
|
---|
| 230 | }
|
---|
[24107] | 231 | }
|
---|
[16844] | 232 | return $rtnVal;
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | #------------------------------------------------------------------------------
|
---|
| 236 | # Read a line from file (end of line is $/)
|
---|
| 237 | # Inputs: 0) reference to RandomAccess object, 1) buffer
|
---|
| 238 | # Returns: Number of bytes read
|
---|
| 239 | sub ReadLine($$)
|
---|
| 240 | {
|
---|
| 241 | my $self = shift;
|
---|
| 242 | my $rtnVal;
|
---|
| 243 | my $fp = $self->{FILE_PT};
|
---|
[24107] | 244 |
|
---|
[16844] | 245 | if ($self->{TESTED} < 0) {
|
---|
| 246 | my ($num, $buff);
|
---|
| 247 | my $pos = $self->{POS};
|
---|
| 248 | if ($fp) {
|
---|
| 249 | # make sure we have some data after the current position
|
---|
| 250 | while ($self->{LEN} <= $pos) {
|
---|
| 251 | $num = read($fp, $buff, $CHUNK_SIZE);
|
---|
| 252 | return 0 unless $num;
|
---|
| 253 | ${$self->{BUFF_PT}} .= $buff;
|
---|
| 254 | $self->{LEN} += $num;
|
---|
| 255 | }
|
---|
| 256 | # scan and read until we find the EOL (or hit EOF)
|
---|
| 257 | for (;;) {
|
---|
[24107] | 258 | $pos = index(${$self->{BUFF_PT}}, $/, $pos);
|
---|
| 259 | if ($pos >= 0) {
|
---|
| 260 | $pos += length($/);
|
---|
| 261 | last;
|
---|
| 262 | }
|
---|
[16844] | 263 | $pos = $self->{LEN}; # have scanned to end of buffer
|
---|
| 264 | $num = read($fp, $buff, $CHUNK_SIZE) or last;
|
---|
| 265 | ${$self->{BUFF_PT}} .= $buff;
|
---|
| 266 | $self->{LEN} += $num;
|
---|
| 267 | }
|
---|
| 268 | } else {
|
---|
| 269 | # string i/o
|
---|
[24107] | 270 | $pos = index(${$self->{BUFF_PT}}, $/, $pos);
|
---|
| 271 | if ($pos < 0) {
|
---|
| 272 | $pos = $self->{LEN};
|
---|
| 273 | $self->{POS} = $pos if $self->{POS} > $pos;
|
---|
| 274 | } else {
|
---|
| 275 | $pos += length($/);
|
---|
| 276 | }
|
---|
[16844] | 277 | }
|
---|
| 278 | # read the line from our buffer
|
---|
| 279 | $rtnVal = $pos - $self->{POS};
|
---|
| 280 | $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
|
---|
| 281 | $self->{POS} = $pos;
|
---|
| 282 | } else {
|
---|
| 283 | $_[0] = <$fp>;
|
---|
| 284 | if (defined $_[0]) {
|
---|
| 285 | $rtnVal = length($_[0]);
|
---|
| 286 | } else {
|
---|
| 287 | $rtnVal = 0;
|
---|
| 288 | }
|
---|
| 289 | }
|
---|
| 290 | if ($self->{DEBUG}) {
|
---|
| 291 | my $pos = $self->Tell() - $rtnVal;
|
---|
| 292 | unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
|
---|
| 293 | $self->{DEBUG}->{$pos} = $rtnVal;
|
---|
| 294 | }
|
---|
[24107] | 295 | }
|
---|
| 296 | return $rtnVal;
|
---|
[16844] | 297 | }
|
---|
| 298 |
|
---|
| 299 | #------------------------------------------------------------------------------
|
---|
| 300 | # Read whole file into buffer (without changing read pointer)
|
---|
| 301 | # Inputs: 0) reference to RandomAccess object
|
---|
| 302 | sub Slurp($)
|
---|
| 303 | {
|
---|
| 304 | my $self = shift;
|
---|
| 305 | my $fp = $self->{FILE_PT} || return;
|
---|
| 306 | # read whole file into buffer (in large chunks)
|
---|
| 307 | my ($buff, $num);
|
---|
| 308 | while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
|
---|
| 309 | ${$self->{BUFF_PT}} .= $buff;
|
---|
| 310 | $self->{LEN} += $num;
|
---|
| 311 | }
|
---|
| 312 | }
|
---|
| 313 |
|
---|
| 314 |
|
---|
| 315 | #------------------------------------------------------------------------------
|
---|
| 316 | # set binary mode
|
---|
| 317 | # Inputs: 0) reference to RandomAccess object
|
---|
| 318 | sub BinMode($)
|
---|
| 319 | {
|
---|
| 320 | my $self = shift;
|
---|
| 321 | binmode($self->{FILE_PT}) if $self->{FILE_PT};
|
---|
| 322 | }
|
---|
| 323 |
|
---|
| 324 | #------------------------------------------------------------------------------
|
---|
| 325 | # close the file and free the buffer
|
---|
| 326 | # Inputs: 0) reference to RandomAccess object
|
---|
| 327 | sub Close($)
|
---|
| 328 | {
|
---|
| 329 | my $self = shift;
|
---|
[24107] | 330 |
|
---|
[16844] | 331 | if ($self->{DEBUG}) {
|
---|
| 332 | local $_;
|
---|
| 333 | if ($self->Seek(0,2)) {
|
---|
| 334 | $self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
|
---|
| 335 | my $last;
|
---|
| 336 | my $tot = 0;
|
---|
| 337 | my $bad = 0;
|
---|
| 338 | foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
|
---|
| 339 | my $pos = $_;
|
---|
| 340 | my $len = $self->{DEBUG}->{$_};
|
---|
| 341 | if (defined $last and $last < $pos) {
|
---|
| 342 | my $bytes = $pos - $last;
|
---|
| 343 | $tot += $bytes;
|
---|
| 344 | $self->Seek($last);
|
---|
| 345 | my $buff;
|
---|
| 346 | $self->Read($buff, $bytes);
|
---|
| 347 | my $warn = '';
|
---|
| 348 | if ($buff =~ /[^\0]/) {
|
---|
| 349 | $bad += ($pos - $last);
|
---|
| 350 | $warn = ' - NON-ZERO!';
|
---|
| 351 | }
|
---|
| 352 | printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
|
---|
| 353 | }
|
---|
| 354 | my $cur = $pos + $len;
|
---|
| 355 | $last = $cur unless defined $last and $last > $cur;
|
---|
| 356 | }
|
---|
| 357 | print "$tot bytes missed";
|
---|
| 358 | $bad and print ", $bad non-zero!";
|
---|
| 359 | print "\n";
|
---|
| 360 | } else {
|
---|
| 361 | warn "File::RandomAccess DEBUG not working (file already closed?)\n";
|
---|
| 362 | }
|
---|
| 363 | delete $self->{DEBUG};
|
---|
| 364 | }
|
---|
| 365 | # close the file
|
---|
| 366 | if ($self->{FILE_PT}) {
|
---|
| 367 | close($self->{FILE_PT});
|
---|
| 368 | delete $self->{FILE_PT};
|
---|
| 369 | }
|
---|
| 370 | # reset the buffer
|
---|
| 371 | my $emptyBuff = '';
|
---|
| 372 | $self->{BUFF_PT} = \$emptyBuff;
|
---|
| 373 | $self->{LEN} = 0;
|
---|
| 374 | $self->{POS} = 0;
|
---|
| 375 | }
|
---|
| 376 |
|
---|
| 377 | #------------------------------------------------------------------------------
|
---|
| 378 | 1; # end
|
---|