source: main/tags/2.82/gsdl/perllib/cpan/MARC/File/USMARC.pm@ 32727

Last change on this file since 32727 was 3430, checked in by jrm21, 22 years ago

Added MARCPlug, mostly done by David Bainbridge. It needs a configuration file,
which maps MARC fields into metadata, which is in gsdlhome/etc/marctodc.txt

  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1package MARC::File::USMARC;
2
3=head1 NAME
4
5MARC::File::USMARC - USMARC-specific file handling
6
7=cut
8
9use strict;
10use integer;
11eval 'use bytes' if $] >= 5.006;
12eval 'use warnings' if $] >= 5.006;
13
14use vars qw( $ERROR );
15
16=head1 VERSION
17
18Version 1.10
19
20 $Id: USMARC.pm 3430 2002-09-24 05:17:39Z jrm21 $
21
22=cut
23
24use vars '$VERSION'; $VERSION = '1.10';
25
26use MARC::File;
27use vars qw( @ISA ); @ISA = qw( MARC::File );
28
29use MARC::Record qw( LEADER_LEN );
30use constant SUBFIELD_INDICATOR => "\x1F";
31use constant END_OF_FIELD => "\x1E";
32use constant END_OF_RECORD => "\x1D";
33use constant DIRECTORY_ENTRY_LEN => 12;
34
35=head1 SYNOPSIS
36
37 use MARC::File::USMARC;
38
39 my $file = MARC::File::USMARC::in( $filename );
40
41 while ( my $marc = $file->next() ) {
42 # Do something
43 }
44 $file->close();
45 undef $file;
46
47=head1 EXPORT
48
49None.
50
51=head1 METHODS
52
53=for internal
54
55Internal function to get the next raw record out of a file.
56
57=cut
58
59sub _next {
60 my $self = shift;
61
62 my $fh = $self->{fh};
63
64 my $reclen;
65 return undef if eof($fh);
66
67 local $/ = END_OF_RECORD;
68 my $usmarc = <$fh>;
69
70 if ( length($usmarc) < 5 ) {
71 $self->_warn( "Couldn't find record length" );
72 return $self->_next();
73 }
74
75 $reclen = substr($usmarc,0,5);
76
77 if ( $reclen !~ /^\d{5}$/ or $reclen != length($usmarc) ) {
78 $self->_warn( "Invalid record length \"$reclen\"" );
79 return $self->_next();
80 }
81
82 return $usmarc;
83}
84
85=head2 decode()
86
87Constructor for handling data from a USMARC file. This function takes care of all
88the tag directory parsing & mangling.
89
90Any warnings or coercions can be checked in the C<warnings()> function.
91
92=cut
93
94sub decode {
95 my $text = shift;
96 $text = shift if (ref($text)||$text) =~ /^MARC::File/;
97
98 my $marc = MARC::Record->new();
99
100 # Check for an all-numeric record length
101 ($text =~ /^(\d{5})/)
102 or return $marc->_warn( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric" );
103
104 my $reclen = $1;
105 ($reclen == length($text))
106 or return $marc->_gripe( "Invalid record length: Leader says $reclen bytes, but it's actually ", length( $text ) );
107
108 $marc->leader( substr( $text, 0, LEADER_LEN ) );
109 my @fields = split( END_OF_FIELD, substr( $text, LEADER_LEN ) );
110 my $dir = shift @fields or return $marc->_warn( "No directory found" );
111
112 (length($dir) % 12 == 0)
113 or return $marc->_gripe( "Invalid directory length" );
114 my $nfields = length($dir)/12;
115
116 my $finalfield = pop @fields;
117 # Check for the record terminator, and ignore it
118 ($finalfield eq END_OF_RECORD)
119 or $marc->_gripe( "Invalid record terminator: \"$finalfield\"" );
120
121 # Walk thru the directories, and shift off the fields while we're at it
122 # Shouldn't be any non-digits anywhere in any directory entry
123 my @directory = unpack( "A3 A4 A5" x $nfields, $dir );
124 my @bad = grep /\D/, @directory;
125 if ( @bad ) {
126 return $marc->_gripe( "Non-numeric entries in the tag directory: ", join( ", ", map { "\"$_\"" } @bad ) );
127 }
128
129 my $databytesused = 0;
130 while ( @directory ) {
131 my $tagno = shift @directory;
132 my $len = shift @directory;
133 my $offset = shift @directory;
134 my $tagdata = shift @fields;
135 warn "Specs: ", join( "|", $tagno, $len, $offset, $tagdata ), "\n" if $MARC::Record::DEBUG;
136
137 # Check directory validity
138 ($tagno =~ /^\d\d\d$/)
139 or return $marc->_gripe( "Invalid field number in directory: \"$tagno\"" );
140
141 ($len == length($tagdata) + 1)
142 or $marc->_warn( "Invalid length in the directory for tag $tagno" );
143
144 ($offset == $databytesused)
145 or $marc->_warn( "Directory offsets are out of whack" );
146 $databytesused += $len;
147
148 if ( $tagno < 10 ) {
149 $marc->add_fields( $tagno, $tagdata )
150 or return undef; # We're relying on add_fields() having set $MARC::Record::ERROR
151 } else {
152 my @subfields = split( SUBFIELD_INDICATOR, $tagdata );
153 my $indicators = shift @subfields
154 or return $marc->_gripe( "No subfields found." );
155 my ($ind1,$ind2);
156 if ( $indicators =~ /^([0-9 ])([0-9 ])$/ ) {
157 ($ind1,$ind2) = ($1,$2);
158 } else {
159 $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks\n" );
160 ($ind1,$ind2) = (" "," ");
161 }
162
163 # Split the subfield data into subfield name and data pairs
164 my @subfield_data = map { (substr($_,0,1),substr($_,1)) } @subfields;
165 $marc->add_fields( $tagno, $ind1, $ind2, @subfield_data )
166 or return undef;
167 }
168 } # while
169
170 # Once we're done, there shouldn't be any fields left over: They should all have shifted off.
171 (@fields == 0)
172 or return $marc->_gripe( "I've got leftover fields that weren't in the directory" );
173
174 return $marc;
175}
176
177=head2 update_leader()
178
179If any changes get made to the MARC record, the first 5 bytes of the
180leader (the length) will be invalid. This function updates the
181leader with the correct length of the record as it would be if
182written out to a file.
183
184=cut
185
186sub update_leader() {
187 my $self = shift;
188
189 my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory();
190
191 $self->_set_leader_lengths( $reclen, $baseaddress );
192}
193
194=head2 _build_tag_directory()
195
196Function for internal use only: Builds the tag directory that gets
197put in front of the data in a MARC record.
198
199Returns two array references, and two lengths: The tag directory, and the data fields themselves,
200the length of all data (including the Leader that we expect will be added),
201and the size of the Leader and tag directory.
202
203=cut
204
205sub _build_tag_directory {
206 my $marc = shift;
207 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
208 die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record";
209
210 my @fields;
211 my @directory;
212
213 my $dataend = 0;
214 for my $field ( $marc->fields() ) {
215 # Dump data into proper format
216 my $str = $field->as_usmarc;
217 push( @fields, $str );
218
219 # Create directory entry
220 my $len = length $str;
221 my $direntry = sprintf( "%03d%04d%05d", $field->tag, $len, $dataend );
222 push( @directory, $direntry );
223 $dataend += $len;
224 }
225
226 my $baseaddress =
227 LEADER_LEN + # better be 24
228 ( @directory * DIRECTORY_ENTRY_LEN ) +
229 # all the directory entries
230 1; # end-of-field marker
231
232
233 my $total =
234 $baseaddress + # stuff before first field
235 $dataend + # Length of the fields
236 1; # End-of-record marker
237
238
239
240 return (\@fields, \@directory, $total, $baseaddress);
241}
242
243=head2 encode()
244
245Returns a string of characters suitable for writing out to a USMARC file,
246including the leader, directory and all the fields.
247
248=cut
249
250sub encode() {
251 my $marc = shift;
252 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
253
254 my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc);
255 $marc->set_leader_lengths( $reclen, $baseaddress );
256
257 # Glomp it all together
258 return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD);
259}
260
2611;
262
263__END__
264
265=head1 RELATED MODULES
266
267L<MARC::Record>
268
269=head1 TODO
270
271Make some sort of autodispatch so that you don't have to explicitly
272specify the MARC::File::X subclass, sort of like how DBI knows to
273use DBD::Oracle or DBD::Mysql.
274
275=head1 LICENSE
276
277This code may be distributed under the same terms as Perl itself.
278
279Please note that these modules are not products of or supported by the
280employers of the various contributors to the code.
281
282=head1 AUTHOR
283
284Andy Lester, E<lt>[email protected]<gt> or E<lt>[email protected]<gt>
285
286=cut
287
Note: See TracBrowser for help on using the repository browser.