1 | package MARC::File::USMARC;
|
---|
2 |
|
---|
3 | =head1 NAME
|
---|
4 |
|
---|
5 | MARC::File::USMARC - USMARC-specific file handling
|
---|
6 |
|
---|
7 | =cut
|
---|
8 |
|
---|
9 | use strict;
|
---|
10 | use integer;
|
---|
11 | eval 'use bytes' if $] >= 5.006;
|
---|
12 | eval 'use warnings' if $] >= 5.006;
|
---|
13 |
|
---|
14 | use vars qw( $ERROR );
|
---|
15 |
|
---|
16 | =head1 VERSION
|
---|
17 |
|
---|
18 | Version 1.10
|
---|
19 |
|
---|
20 | $Id: USMARC.pm 3430 2002-09-24 05:17:39Z jrm21 $
|
---|
21 |
|
---|
22 | =cut
|
---|
23 |
|
---|
24 | use vars '$VERSION'; $VERSION = '1.10';
|
---|
25 |
|
---|
26 | use MARC::File;
|
---|
27 | use vars qw( @ISA ); @ISA = qw( MARC::File );
|
---|
28 |
|
---|
29 | use MARC::Record qw( LEADER_LEN );
|
---|
30 | use constant SUBFIELD_INDICATOR => "\x1F";
|
---|
31 | use constant END_OF_FIELD => "\x1E";
|
---|
32 | use constant END_OF_RECORD => "\x1D";
|
---|
33 | use constant DIRECTORY_ENTRY_LEN => 12;
|
---|
34 |
|
---|
35 | =head1 SYNOPSIS
|
---|
36 |
|
---|
37 | use MARC::File::USMARC;
|
---|
38 |
|
---|
39 | my $file = MARC::File::USMARC::in( $filename );
|
---|
40 |
|
---|
41 | while ( my $marc = $file->next() ) {
|
---|
42 | # Do something
|
---|
43 | }
|
---|
44 | $file->close();
|
---|
45 | undef $file;
|
---|
46 |
|
---|
47 | =head1 EXPORT
|
---|
48 |
|
---|
49 | None.
|
---|
50 |
|
---|
51 | =head1 METHODS
|
---|
52 |
|
---|
53 | =for internal
|
---|
54 |
|
---|
55 | Internal function to get the next raw record out of a file.
|
---|
56 |
|
---|
57 | =cut
|
---|
58 |
|
---|
59 | sub _next {
|
---|
60 | my $self = shift;
|
---|
61 |
|
---|
62 | my $fh = $self->{fh};
|
---|
63 |
|
---|
64 | my $reclen;
|
---|
65 | return undef if eof($fh);
|
---|
66 |
|
---|
67 | local $/ = END_OF_RECORD;
|
---|
68 | my $usmarc = <$fh>;
|
---|
69 |
|
---|
70 | if ( length($usmarc) < 5 ) {
|
---|
71 | $self->_warn( "Couldn't find record length" );
|
---|
72 | return $self->_next();
|
---|
73 | }
|
---|
74 |
|
---|
75 | $reclen = substr($usmarc,0,5);
|
---|
76 |
|
---|
77 | if ( $reclen !~ /^\d{5}$/ or $reclen != length($usmarc) ) {
|
---|
78 | $self->_warn( "Invalid record length \"$reclen\"" );
|
---|
79 | return $self->_next();
|
---|
80 | }
|
---|
81 |
|
---|
82 | return $usmarc;
|
---|
83 | }
|
---|
84 |
|
---|
85 | =head2 decode()
|
---|
86 |
|
---|
87 | Constructor for handling data from a USMARC file. This function takes care of all
|
---|
88 | the tag directory parsing & mangling.
|
---|
89 |
|
---|
90 | Any warnings or coercions can be checked in the C<warnings()> function.
|
---|
91 |
|
---|
92 | =cut
|
---|
93 |
|
---|
94 | sub decode {
|
---|
95 | my $text = shift;
|
---|
96 | $text = shift if (ref($text)||$text) =~ /^MARC::File/;
|
---|
97 |
|
---|
98 | my $marc = MARC::Record->new();
|
---|
99 |
|
---|
100 | # Check for an all-numeric record length
|
---|
101 | ($text =~ /^(\d{5})/)
|
---|
102 | or return $marc->_warn( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric" );
|
---|
103 |
|
---|
104 | my $reclen = $1;
|
---|
105 | ($reclen == length($text))
|
---|
106 | or return $marc->_gripe( "Invalid record length: Leader says $reclen bytes, but it's actually ", length( $text ) );
|
---|
107 |
|
---|
108 | $marc->leader( substr( $text, 0, LEADER_LEN ) );
|
---|
109 | my @fields = split( END_OF_FIELD, substr( $text, LEADER_LEN ) );
|
---|
110 | my $dir = shift @fields or return $marc->_warn( "No directory found" );
|
---|
111 |
|
---|
112 | (length($dir) % 12 == 0)
|
---|
113 | or return $marc->_gripe( "Invalid directory length" );
|
---|
114 | my $nfields = length($dir)/12;
|
---|
115 |
|
---|
116 | my $finalfield = pop @fields;
|
---|
117 | # Check for the record terminator, and ignore it
|
---|
118 | ($finalfield eq END_OF_RECORD)
|
---|
119 | or $marc->_gripe( "Invalid record terminator: \"$finalfield\"" );
|
---|
120 |
|
---|
121 | # Walk thru the directories, and shift off the fields while we're at it
|
---|
122 | # Shouldn't be any non-digits anywhere in any directory entry
|
---|
123 | my @directory = unpack( "A3 A4 A5" x $nfields, $dir );
|
---|
124 | my @bad = grep /\D/, @directory;
|
---|
125 | if ( @bad ) {
|
---|
126 | return $marc->_gripe( "Non-numeric entries in the tag directory: ", join( ", ", map { "\"$_\"" } @bad ) );
|
---|
127 | }
|
---|
128 |
|
---|
129 | my $databytesused = 0;
|
---|
130 | while ( @directory ) {
|
---|
131 | my $tagno = shift @directory;
|
---|
132 | my $len = shift @directory;
|
---|
133 | my $offset = shift @directory;
|
---|
134 | my $tagdata = shift @fields;
|
---|
135 | warn "Specs: ", join( "|", $tagno, $len, $offset, $tagdata ), "\n" if $MARC::Record::DEBUG;
|
---|
136 |
|
---|
137 | # Check directory validity
|
---|
138 | ($tagno =~ /^\d\d\d$/)
|
---|
139 | or return $marc->_gripe( "Invalid field number in directory: \"$tagno\"" );
|
---|
140 |
|
---|
141 | ($len == length($tagdata) + 1)
|
---|
142 | or $marc->_warn( "Invalid length in the directory for tag $tagno" );
|
---|
143 |
|
---|
144 | ($offset == $databytesused)
|
---|
145 | or $marc->_warn( "Directory offsets are out of whack" );
|
---|
146 | $databytesused += $len;
|
---|
147 |
|
---|
148 | if ( $tagno < 10 ) {
|
---|
149 | $marc->add_fields( $tagno, $tagdata )
|
---|
150 | or return undef; # We're relying on add_fields() having set $MARC::Record::ERROR
|
---|
151 | } else {
|
---|
152 | my @subfields = split( SUBFIELD_INDICATOR, $tagdata );
|
---|
153 | my $indicators = shift @subfields
|
---|
154 | or return $marc->_gripe( "No subfields found." );
|
---|
155 | my ($ind1,$ind2);
|
---|
156 | if ( $indicators =~ /^([0-9 ])([0-9 ])$/ ) {
|
---|
157 | ($ind1,$ind2) = ($1,$2);
|
---|
158 | } else {
|
---|
159 | $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks\n" );
|
---|
160 | ($ind1,$ind2) = (" "," ");
|
---|
161 | }
|
---|
162 |
|
---|
163 | # Split the subfield data into subfield name and data pairs
|
---|
164 | my @subfield_data = map { (substr($_,0,1),substr($_,1)) } @subfields;
|
---|
165 | $marc->add_fields( $tagno, $ind1, $ind2, @subfield_data )
|
---|
166 | or return undef;
|
---|
167 | }
|
---|
168 | } # while
|
---|
169 |
|
---|
170 | # Once we're done, there shouldn't be any fields left over: They should all have shifted off.
|
---|
171 | (@fields == 0)
|
---|
172 | or return $marc->_gripe( "I've got leftover fields that weren't in the directory" );
|
---|
173 |
|
---|
174 | return $marc;
|
---|
175 | }
|
---|
176 |
|
---|
177 | =head2 update_leader()
|
---|
178 |
|
---|
179 | If any changes get made to the MARC record, the first 5 bytes of the
|
---|
180 | leader (the length) will be invalid. This function updates the
|
---|
181 | leader with the correct length of the record as it would be if
|
---|
182 | written out to a file.
|
---|
183 |
|
---|
184 | =cut
|
---|
185 |
|
---|
186 | sub update_leader() {
|
---|
187 | my $self = shift;
|
---|
188 |
|
---|
189 | my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory();
|
---|
190 |
|
---|
191 | $self->_set_leader_lengths( $reclen, $baseaddress );
|
---|
192 | }
|
---|
193 |
|
---|
194 | =head2 _build_tag_directory()
|
---|
195 |
|
---|
196 | Function for internal use only: Builds the tag directory that gets
|
---|
197 | put in front of the data in a MARC record.
|
---|
198 |
|
---|
199 | Returns two array references, and two lengths: The tag directory, and the data fields themselves,
|
---|
200 | the length of all data (including the Leader that we expect will be added),
|
---|
201 | and the size of the Leader and tag directory.
|
---|
202 |
|
---|
203 | =cut
|
---|
204 |
|
---|
205 | sub _build_tag_directory {
|
---|
206 | my $marc = shift;
|
---|
207 | $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
|
---|
208 | die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record";
|
---|
209 |
|
---|
210 | my @fields;
|
---|
211 | my @directory;
|
---|
212 |
|
---|
213 | my $dataend = 0;
|
---|
214 | for my $field ( $marc->fields() ) {
|
---|
215 | # Dump data into proper format
|
---|
216 | my $str = $field->as_usmarc;
|
---|
217 | push( @fields, $str );
|
---|
218 |
|
---|
219 | # Create directory entry
|
---|
220 | my $len = length $str;
|
---|
221 | my $direntry = sprintf( "%03d%04d%05d", $field->tag, $len, $dataend );
|
---|
222 | push( @directory, $direntry );
|
---|
223 | $dataend += $len;
|
---|
224 | }
|
---|
225 |
|
---|
226 | my $baseaddress =
|
---|
227 | LEADER_LEN + # better be 24
|
---|
228 | ( @directory * DIRECTORY_ENTRY_LEN ) +
|
---|
229 | # all the directory entries
|
---|
230 | 1; # end-of-field marker
|
---|
231 |
|
---|
232 |
|
---|
233 | my $total =
|
---|
234 | $baseaddress + # stuff before first field
|
---|
235 | $dataend + # Length of the fields
|
---|
236 | 1; # End-of-record marker
|
---|
237 |
|
---|
238 |
|
---|
239 |
|
---|
240 | return (\@fields, \@directory, $total, $baseaddress);
|
---|
241 | }
|
---|
242 |
|
---|
243 | =head2 encode()
|
---|
244 |
|
---|
245 | Returns a string of characters suitable for writing out to a USMARC file,
|
---|
246 | including the leader, directory and all the fields.
|
---|
247 |
|
---|
248 | =cut
|
---|
249 |
|
---|
250 | sub encode() {
|
---|
251 | my $marc = shift;
|
---|
252 | $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
|
---|
253 |
|
---|
254 | my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc);
|
---|
255 | $marc->set_leader_lengths( $reclen, $baseaddress );
|
---|
256 |
|
---|
257 | # Glomp it all together
|
---|
258 | return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD);
|
---|
259 | }
|
---|
260 |
|
---|
261 | 1;
|
---|
262 |
|
---|
263 | __END__
|
---|
264 |
|
---|
265 | =head1 RELATED MODULES
|
---|
266 |
|
---|
267 | L<MARC::Record>
|
---|
268 |
|
---|
269 | =head1 TODO
|
---|
270 |
|
---|
271 | Make some sort of autodispatch so that you don't have to explicitly
|
---|
272 | specify the MARC::File::X subclass, sort of like how DBI knows to
|
---|
273 | use DBD::Oracle or DBD::Mysql.
|
---|
274 |
|
---|
275 | =head1 LICENSE
|
---|
276 |
|
---|
277 | This code may be distributed under the same terms as Perl itself.
|
---|
278 |
|
---|
279 | Please note that these modules are not products of or supported by the
|
---|
280 | employers of the various contributors to the code.
|
---|
281 |
|
---|
282 | =head1 AUTHOR
|
---|
283 |
|
---|
284 | Andy Lester, E<lt>[email protected]<gt> or E<lt>[email protected]<gt>
|
---|
285 |
|
---|
286 | =cut
|
---|
287 |
|
---|