source: gs2-extensions/parallel-building/trunk/src/perllib/cpan/MARC/Record.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 15.6 KB
Line 
1package MARC::Record;
2
3=head1 NAME
4
5MARC::Record - Perl extension for handling MARC records
6
7=cut
8
9use strict;
10use integer;
11eval 'use warnings' if $] >= 5.006;
12
13use vars qw( $ERROR );
14
15use MARC::Field;
16
17=head1 VERSION
18
19Version 1.10
20
21 $Id: Record.pm 3430 2002-09-24 05:17:39Z jrm21 $
22
23=cut
24
25use vars '$VERSION'; $VERSION = '1.10';
26
27use Exporter;
28use vars qw( @ISA @EXPORTS @EXPORT_OK );
29@ISA = qw( Exporter );
30@EXPORTS = qw();
31@EXPORT_OK = qw( LEADER_LEN );
32
33use vars qw( $DEBUG ); $DEBUG = 0;
34
35use constant LEADER_LEN => 24;
36
37=head1 DESCRIPTION
38
39Module for handling MARC records as objects. The file-handling stuff is
40in MARC::File::*.
41
42=head1 EXPORT
43
44None.
45
46=head1 ERROR HANDLING
47
48Any errors generated are stored in C<$MARC::Record::ERROR>.
49Warnings are kept with the record and accessible in the C<warnings()> method.
50
51=head1 METHODS
52
53=head2 new()
54
55Base constructor for the class. It just returns a completely empty record.
56To get real data, you'll need to populate it with fields, or use one of
57the MARC::File::* modules to read from a file.
58
59=cut
60
61sub new {
62 my $class = shift;
63 $class = ref($class) || $class; # Handle cloning
64 my $self = {
65 _leader => ' ' x 24,
66 _fields => [],
67 _warnings => [],
68 };
69 return bless $self, $class;
70} # new()
71
72
73
74
75=head2 fields()
76
77Returns a list of all the fields in the record. The list contains
78a MARC::Field object for each field in the record.
79
80=cut
81
82sub fields() {
83 my $self = shift;
84 return @{$self->{_fields}};
85}
86
87=head2 field(tagspec(s))
88
89Returns a list of tags that match the field specifier, or in scalar
90context, just the first matching tag.
91
92The field specifier can be a simple number (i.e. "245"), or use the "X"
93notation of wildcarding (i.e. subject tags are "6XX").
94
95=cut
96
97my %field_regex;
98
99sub field {
100 my $self = shift;
101 my @specs = @_;
102
103 my @list = ();
104 for my $tag ( @specs ) {
105 my $regex = $field_regex{ $tag };
106
107 # Compile & stash it if necessary
108 if ( not defined $regex ) {
109 my $pattern = $tag;
110 $pattern =~ s/X/\\d/g;
111 $regex = qr/^$pattern$/;
112 $field_regex{ $tag } = $regex;
113 } # not defined
114
115 for my $maybe ( $self->fields ) {
116 if ( $maybe->tag =~ $regex ) {
117 return $maybe unless wantarray;
118
119 push( @list, $maybe );
120 } # if
121 } # for $maybe
122 } # for $tag
123
124 return @list;
125}
126
127=head2 subfield(tag,subfield)
128
129Shortcut method for getting just a subfield for a tag. These are equivalent:
130
131 my $title = $marc->field(245)->subfield("a");
132 my $title = $marc->subfield(245,"a");
133
134If either the field or subfield can't be found, C<undef> is returned.
135
136=cut
137
138sub subfield($$) {
139 my $self = shift;
140 my $tag = shift;
141 my $subfield = shift;
142
143 my $field = $self->field($tag) or return undef;
144 return $field->subfield($subfield);
145} # subfield()
146
147=head2 insert_grouped_field(C<$field>)
148
149Will insert the specified MARC::Field object into the record in 'grouped
150order' and return true (1) on success, and false (undef) on failure.
151For example, if a '650' field is inserted with insert_grouped_field()
152it will be inserted at the end of the 6XX group of tags. After some discussion
153on the perl4lib list it was descided that this is ordinarily what you will
154want. If you would like to insert at a specific point in the record you can use
155insert_fields_after() and insert_fields_before() methods which are described
156below.
157
158=cut
159
160sub insert_grouped_field {
161 my ($self,$new) = @_;
162 _all_parms_are_fields($new) or return(_gripe('Argument must be MARC::Fieldobject'));
163
164 ## try to find the end of the field group and insert it there
165 my $limit = int($new->tag() / 100);
166 my $found = 0;
167 foreach my $field ($self->fields()) {
168 if ( int($field->tag() / 100) > $limit ) {
169 $self->insert_field_before($field,$new);
170 $found = 1;
171 last;
172 }
173 }
174
175 ## if we couldn't find the end of the group, then we must not have
176 ## any tags this high yet, so just append it
177 if (!$found) {
178 $self->append_fields($new);
179 }
180
181 return(1);
182
183}
184
185=for internal
186
187=cut
188
189sub _all_parms_are_fields {
190 for ( @_ ) {
191 return 0 unless ref($_) eq 'MARC::Field';
192 }
193 return 1;
194}
195
196=head2 append_field(C<@fields>)
197
198Appends the field specified by C<$field> to the end of the record.
199C<@fields> need to be MARC::Field objects.
200
201 my $field = MARC::Field->new('590','','','a' => 'My local note.');
202 $record->append_field($field);
203
204Returns the number of fields appended.
205
206=cut
207
208sub append_fields {
209 my $self = shift;
210
211 _all_parms_are_fields(@_) or return(_gripe('Arguments must be MARC::Field objects'));
212
213 push(@{ $self->{_fields} }, @_);
214 return scalar @_;
215}
216
217=head2 insert_fields_before($before_field,@new_fields)
218
219Inserts the field specified by C<$new_field> before the field C<$before_field>.
220Returns the number of fields inserted, or undef on failures.
221Both C<$before_field> and all C<@new_fields> need to be MARC::Field objects.
222
223 my $before_field = $record->field('260');
224 my $new_field = MARC::Field->new('250','','','a' => '2nd ed.');
225 $record->insert_fields_before($before_field,$new_field);
226
227=cut
228
229sub insert_fields_before {
230 my $self = shift;
231
232 _all_parms_are_fields(@_) or return(_gripe('All arguments must be MARC::Field objects'));
233 my ($before,@new) = @_;
234
235 ## find position of $before
236 my $fields = $self->{_fields};
237 my $pos = 0;
238 foreach my $f (@$fields) {
239 last if ($f == $before);
240 $pos++;
241 }
242
243 ## insert before $before
244 if ($pos >= @$fields) {
245 return(_gripe("Couldn't find field to insert before"));
246 }
247 splice(@$fields,$pos,0,@new);
248 return scalar @new;
249
250}
251
252=head2 insert_fields_after($after_field,@new_fields)
253
254Identical to L<insert_fields_before()>, but fields are added after C<$after_field>.
255
256=cut
257
258sub insert_fields_after {
259 my $self = shift;
260
261 _all_parms_are_fields(@_) or return(_gripe('All arguments must be MARC::Field objects'));
262 my ($after,@new) = @_;
263
264 ## find position of $after
265 my $fields = $self->{_fields};
266 my $pos = 0;
267 foreach my $f (@$fields) {
268 last if ($f == $after);
269 $pos++;
270 }
271
272 ## insert after $after
273 if ($pos+1 >= @$fields) {
274 return(_gripe("Couldn't find field to insert after"));
275 }
276 splice(@$fields,$pos+1,0,@new);
277 return scalar @new;
278}
279
280=head2 delete_field(C<$field>)
281
282Deletes a field from the record.
283
284The field must have been retrieved from the record using the
285C<field()> method. For example, to delete a 526 tag if it exists:
286
287 my $tag526 = $marc->field( "526" );
288 if ( $tag526 ) {
289 $marc->delete_field( $tag526 );
290 }
291
292C<delete_field()> returns the number of fields that were deleted.
293This shouldn't be 0 unless you didn't get the tag properly.
294
295=cut
296
297sub delete_field {
298 my $self = shift;
299 my $deleter = shift;
300 my $list = $self->{_fields};
301
302 my $old_count = @$list;
303 @$list = grep { $_ != $deleter } @$list;
304 return $old_count - @$list;
305}
306
307=head2 as_usmarc()
308
309This is a wrapper around C<MARC::File::USMARC::encode()> for compatibility with
310older versions of MARC::Record.
311
312=cut
313
314sub as_usmarc() {
315 my $self = shift;
316
317 require MARC::File::USMARC;
318
319 return MARC::File::USMARC::encode( $self );
320}
321
322=head2 as_formatted()
323
324Returns a pretty string for printing in a MARC dump.
325
326=cut
327
328sub as_formatted() {
329 my $self = shift;
330
331 my @lines = ( "LDR " . ($self->{_leader} || "") );
332 for my $field ( @{$self->{_fields}} ) {
333 push( @lines, $field->as_formatted() );
334 }
335
336 return join( "\n", @lines );
337} # as_formatted
338
339=head2 title()
340
341Returns the title from the 245 tag.
342Note that it is a string, not a MARC::Field record.
343
344=cut
345
346sub title() {
347 my $self = shift;
348
349 my $field = $self->field(245) or return "<no 245 tag found>";
350
351 return $field->as_string;
352}
353
354=head2 author()
355
356Returns the author from the 100, 110 or 111 tag.
357Note that it is a string, not a MARC::Field record.
358
359=cut
360
361sub author() {
362 my $self = shift;
363
364 for my $tag ( qw( 100 110 111 ) ) {
365 my $field = $self->field($tag);
366 return $field->as_string() if $field;
367 }
368
369 return "<No author tag found>";
370}
371
372=head2 leader([text])
373
374Returns the leader for the record. Sets the leader if I<text> is defined.
375No error checking is done on the validity of the leader.
376
377=cut
378
379sub leader {
380 my $self = shift;
381 my $text = shift;
382
383 if ( defined $text ) {
384 (length($text) eq 24)
385 or $self->_warn( "Leader must be 24 bytes long" );
386 $self->{_leader} = $text;
387 } # set the leader
388
389 return $self->{_leader};
390} # leader()
391
392=head2 set_leader_lengths( $reclen, $baseaddr )
393
394Internal function for updating the leader's length and base address.
395
396=cut
397
398sub set_leader_lengths {
399 my $self = shift;
400 my $reclen = shift;
401 my $baseaddr = shift;
402 substr($self->{_leader},0,5) = sprintf("%05d",$reclen);
403 substr($self->{_leader},12,5) = sprintf("%05d",$baseaddr);
404}
405
406=head2 clone( [field specs] )
407
408The C<clone()> method makes a copy of an existing MARC record and returns
409the new version. Note that you cannot just say:
410
411 my $newmarc = $oldmarc;
412
413This just makes a copy of the reference, not a new object. You must use
414the C<clone()> method like so:
415
416 my $newmarc = $oldmarc->clone;
417
418You can also specify field specs to filter down only a
419certain subset of fields. For instance, if you only wanted the
420title and ISBN tags from a record, you could do this:
421
422 my $small_marc = $marc->clone( 245, '020' );
423
424The order of the fields is preserved as it was in the original record.
425
426=cut
427
428sub clone {
429 my $self = shift;
430 my @keeper_tags = @_;
431
432 my $clone = $self->new();
433 $clone->{_leader} = $self->{_leader};
434
435 my $filtered = @keeper_tags ? [$self->field( @keeper_tags )] : undef;
436
437 for my $field ( $self->fields() ) {
438 if ( !$filtered || (grep {$field==$_} @$filtered ) ) {
439 $clone->add_fields( $field->clone );
440 }
441 }
442
443 # XXX FIX THIS $clone->update_leader();
444
445 return $clone;
446}
447
448=head2 warnings()
449
450Returns the warnings that were created when the record was read.
451These are things like "Invalid indicators converted to blanks".
452
453The warnings are items that you might be interested in, or might
454not. It depends on how stringently you're checking data. If
455you're doing some grunt data analysis, you probably don't care.
456
457A side effect of calling warnings() is that the warning buffer will
458be cleared.
459
460=cut
461
462sub warnings() {
463 my $self = shift;
464 my @warnings = @{$self->{_warnings}};
465 $self->{_warnings} = [];
466 return @warnings;
467}
468
469=head2 add_fields()
470
471C<add_fields()> is now deprecated, and users are encouraged to use C<append_field()>,
472C<insert_field_after()>, and C<insert_field_before()> since they do what you want
473probably. It is still here though, for backwards compatability.
474
475C<add_fields()> adds MARC::Field objects to the end of the list. Returns the
476number of fields added, or C<undef> if there was an error.
477
478There are three ways of calling C<add_fields()> to add data to the record.
479
480=over 4
481
482=item 1 Create a MARC::Field object and add it
483
484 my $author = MARC::Field->new(
485 100, "1", " ", a => "Arnosky, Jim."
486 );
487 $marc->add_fields( $author );
488
489=item 2 Add the data fields directly, and let C<add_fields()> take care of the objectifying.
490
491 $marc->add_fields(
492 245, "1", "0",
493 a => "Raccoons and ripe corn /",
494 c => "Jim Arnosky.",
495 );
496
497=item 3 Same as #2 above, but pass multiple fields of data in anonymous lists
498
499 $marc->add_fields(
500 [ 250, " ", " ", a => "1st ed." ],
501 [ 650, "1", " ", a => "Raccoons." ],
502 );
503
504=back
505
506=cut
507
508sub add_fields {
509 my $self = shift;
510
511 my $nfields = 0;
512 my $fields = $self->{_fields};
513
514 while ( my $parm = shift ) {
515 # User handed us a list of data (most common possibility)
516 if ( ref($parm) eq "" ) {
517 my $field = MARC::Field->new( $parm, @_ )
518 or return _gripe( $MARC::Field::ERROR );
519 push( @$fields, $field );
520 ++$nfields;
521 last; # Bail out, we're done eating parms
522
523 # User handed us an object.
524 } elsif ( ref($parm) eq "MARC::Field" ) {
525 push( @$fields, $parm );
526 ++$nfields;
527
528 # User handed us an anonymous list of parms
529 } elsif ( ref($parm) eq "ARRAY" ) {
530 my $field = MARC::Field->new(@$parm)
531 or return _gripe( $MARC::Field::ERROR );
532 push( @$fields, $field );
533 ++$nfields;
534
535 } else {
536 return _gripe( "Unknown parm of type", ref($parm), " passed to add_fields()" );
537 } # if
538
539 } # while
540
541 return $nfields;
542}
543
544=head2 new_from_usmarc( $marcblob )
545
546This is a wrapper around C<MARC::File::USMARC::decode()> for compatibility with
547older versions of MARC::Record.
548
549=cut
550
551sub new_from_usmarc {
552 my $blob = shift;
553 $blob = shift if (ref($blob) || ($blob eq "MARC::Record"));
554
555 require MARC::File::USMARC;
556
557 return MARC::File::USMARC::decode( $blob );
558}
559
560
561# NOTE: _warn is an object method
562sub _warn {
563 my $self = shift;
564
565 push( @{$self->{_warnings}}, join( "", @_ ) );
566}
567
568
569# NOTE: _gripe is NOT an object method
570sub _gripe {
571 $ERROR = join( "", @_ );
572
573 warn $ERROR;
574
575 return undef;
576}
577
578
5791;
580
581__END__
582
583=head1 DESIGN NOTES
584
585A brief discussion of why MARC::Record is done the way it is:
586
587=over 4
588
589=item * It's built for quick prototyping
590
591One of the areas Perl excels is in allowing the programmer to
592create easy solutions quickly. MARC::Record is designed along
593those same lines. You want a program to dump all the 6XX
594tags in a file? MARC::Record is your friend.
595
596=item * It's built for extensibility
597
598Currently, I'm using MARC::Record for analyzing bibliographic
599data, but who knows what might happen in the future? MARC::Record
600needs to be just as adept at authority data, too.
601
602=item * It's designed around accessor methods
603
604I use method calls everywhere, and I expect calling programs to do
605the same, rather than accessing internal data directly. If you
606access an object's hash fields on your own, future releases may
607break your code.
608
609=item * It's not built for speed
610
611One of the tradeoffs in using accessor methods is some overhead
612in the method calls. Is this slow? I don't know, I haven't measured.
613I would suggest that if you're a cycle junkie that you use
614Benchmark.pm to check to see where your bottlenecks are, and then
615decide if MARC::Record is for you.
616
617=back
618
619=head1 RELATED MODULES
620
621L<MARC::Record>, L<MARC::Lint>
622
623=head1 SEE ALSO
624
625=over 4
626
627=item * perl4lib (L<http://www.rice.edu/perl4lib/>)
628
629A mailing list devoted to the use of Perl in libraries.
630
631=item * Library Of Congress MARC pages (L<http://www.loc.gov/marc/>)
632
633The definitive source for all things MARC.
634
635
636=item * I<Understanding MARC Bibliographic> (L<http://lcweb.loc.gov/marc/umb/>)
637
638Online version of the free booklet. An excellent overview of the MARC format. Essential.
639
640
641=item * Tag Of The Month (L<http://www.tagofthemonth.com/>)
642
643Follett Software Company's
644(L<http://www.fsc.follett.com/>) monthly discussion of various MARC tags.
645
646=back
647
648=head1 TODO
649
650=over 4
651
652=item * Incorporate MARC.pm in the distribution.
653
654Combine MARC.pm and MARC::* into one distribution.
655
656=item * Podify MARC.pm
657
658=item * Allow regexes across the entire tag
659
660Imagine something like this:
661
662 my @sears_headings = $marc->tag_grep( /Sears/ );
663
664(from Mike O'Regan)
665
666=item * Insert a field in an arbitrary place in the record
667
668=item * Allow deleting a field
669
670 for my $field ( $record->field( "856" ) ) {
671 $record->delete_field( $field ) unless useful($field);
672 } # for
673
674(from Anne Highsmith [email protected])
675
676
677=item * Modifying an existing field
678
679=back
680
681=head1 IDEAS
682
683Ideas are things that have been considered, but nobody's actually asked for.
684
685=over 4
686
687=item * Create multiple output formats.
688
689These could be ASCII, XML, or MarcMaker.
690
691=back
692
693=head1 LICENSE
694
695This code may be distributed under the same terms as Perl itself.
696
697Please note that these modules are not products of or supported by the
698employers of the various contributors to the code.
699
700=head1 AUTHOR
701
702Andy Lester, E<lt>[email protected]<gt> or E<lt>[email protected]<gt>
703
704=cut
705
Note: See TracBrowser for help on using the repository browser.