source: trunk/gsdl/perllib/plugins/MARCPlug.pm@ 3508

Last change on this file since 3508 was 3508, checked in by jrm21, 22 years ago

modified copyright statement

  • Property svn:keywords set to Author Date Id Revision
File size: 5.9 KB
Line 
1###########################################################################
2#
3# MARCPlug.pm -- basic MARC plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2002 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package MARCPlug;
28
29use SplitPlug;
30
31use unicode;
32use util;
33use parsargv;
34
35sub BEGIN {
36 @ISA = ('SplitPlug');
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38}
39
40use MARC::Record;
41use MARC::Batch;
42
43sub print_usage {
44 print STDERR "\n usage: plugin MARCPlug [options]\n\n";
45 print STDERR " options:\n";
46 print STDERR
47" -metadata_mapping Name of file that includes mapping details from MARC
48 values to Greenstone metadata names. Defaults to
49 'marctodc.txt' found in the site's etc directory.\n\n";
50}
51
52sub new {
53 my $class = shift (@_);
54 my $self = new SplitPlug ($class, @_);
55
56 my $metadata_mapping;
57
58 if (!parsargv::parse(\@_,
59 q^metadata_map/.*/marctodc.txt^, \$metadata_mapping,
60 "allow_extra_options")) {
61
62 print STDERR "\nIncorrect options passed to MARCPlug, check your collect.cfg configuration file\n";
63 &print_usage();
64 die "\n";
65 }
66
67
68 $self->{'mm_file'} = $metadata_mapping; # relative to etc dir
69
70 return bless $self, $class;
71}
72
73sub init {
74 my $self = shift (@_);
75 my ($verbosity, $outhandle, $failhandle) = @_;
76
77 my @metadata_mapping = ();
78
79 # read in the metadata mapping file
80 my $mm_file =
81 &util::filename_cat( $ENV{'GSDLHOME'}, "etc", $self->{'mm_file'} );
82
83 if (!-e $mm_file)
84 {
85
86 my $msg = "MARCPlug ERROR: Can't locate mapping file \"" .
87 $self->{'mm_file'} . "\".\n This file should be at $mm_file\n" .
88 " No marc files can be processed.\n";
89
90 print $outhandle $msg;
91 print $failhandle $msg;
92 $self->{'mm_file'} = undef;
93 # We pick up the error in process() if there is no $mm_file
94 # If we exit here, then pluginfo.pl will exit too!
95 }
96 elsif (open(MMIN, "<$mm_file"))
97 {
98 my $l=1;
99 my $line;
100 while (defined($line=<MMIN>))
101 {
102 chomp $line;
103 if ($line =~ m/^(\d+)\s*->\s*(\w+)$/)
104 {
105 my $marc_info = $1;
106 my $gsdl_info = $2;
107 my $mapping = { 'marc' => $marc_info, 'gsdl' => $gsdl_info };
108 push(@metadata_mapping,$mapping);
109 }
110 elsif ($line !~ m/^\#/ # allow comments (# in first column)
111 && $line !~ m/^\s*$/) # allow blank lines
112 {
113 print $outhandle "Parse error on line $l of $mm_file:\n";
114 print $outhandle " \"$line\"\n";
115 }
116 $l++
117 }
118 close(MMIN);
119 }
120 else
121 {
122 print STDERR "Unable to open $mm_file: $!\n";
123 }
124
125 $self->{'metadata_mapping'} = \@metadata_mapping;
126
127 $self->SUPER::init(@_);
128}
129
130
131sub get_default_process_exp {
132 my $self = shift (@_);
133
134 return q^(?i)(\.marc)$^;
135}
136
137
138sub get_default_split_exp {
139 # \r\n for msdos eol, \n for unix
140 return q^\r?\n\s*\r?\n^;
141}
142
143
144
145sub read_file {
146 my $self = shift (@_);
147 my ($filename, $encoding, $language, $textref) = @_;
148
149 @marc_entries = ();
150
151 if (!-r $filename)
152 {
153 my $outhandle = $self->{'outhandle'};
154 print $outhandle "Read permission denied for $filename\n" if $self->{'verbosity'};
155 return;
156 }
157
158 my $batch = new MARC::Batch( 'USMARC', $filename );
159 while ( my $marc = $batch->next )
160 {
161 push(@marc_entries,$marc);
162 $$textref .= $marc->as_formatted();
163 $$textref .= "\n\n"; # for SplitPlug - see default_split_exp above...
164 }
165
166 $self->{'marc_entries'} = \@marc_entries;
167}
168
169
170
171# do plugin specific processing of doc_obj
172# This gets done for each record found by SplitPlug in marc files.
173sub process {
174 my $self = shift (@_);
175 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
176 my $outhandle = $self->{'outhandle'};
177
178 if (! defined($self->{'mm_file'}))
179 {
180 print $outhandle "MARCPlug: no metadata file! Can't process $file\n";
181 return undef;
182 }
183 print $outhandle "MARCPlug: processing $file\n"
184 if $self->{'verbosity'} > 1;
185
186 my $cursection = $doc_obj->get_top_section();
187
188 my $marc_entries = $self->{'marc_entries'};
189 my $marc = shift(@$marc_entries);
190
191 $self->extract_metadata ($marc, $metadata, $doc_obj, $cursection);
192
193 # add spaces after the sub-field markers, for word boundaries
194 $$textref =~ s/^(.{6} _\w)/$1 /gm;
195
196 # add text to document object
197 $$textref =~ s/</&lt;/g;
198 $$textref =~ s/>/&gt;/g;
199
200 print $outhandle " Adding Marc Record:\n",substr($$textref,0,40), " ...\n"
201 if $self->{'verbosity'} > 2;
202
203 $$textref = "<pre>\n" . $$textref . "</pre>\n"; # HTML formatting...
204
205 $doc_obj->add_utf8_text($cursection, $$textref);
206
207 return 1;
208}
209
210
211
212sub extract_metadata
213{
214 my $self = shift (@_);
215 my ($marc, $metadata, $doc_obj, $section) = @_;
216 my $outhandle = $self->{'outhandle'};
217
218 my $metadata_mapping = $self->{'metadata_mapping'};
219 my $mm;
220 foreach $mm ( @$metadata_mapping )
221 {
222 my $marc_field = $mm->{'marc'};
223 my @metavalues = $marc->field($marc_field);
224
225 if (scalar(@metavalues)>0)
226 {
227 my $metaname = $mm->{'gsdl'};
228 my $metavalue;
229 foreach $metavalue ( @metavalues )
230 {
231 $doc_obj->add_utf8_metadata ($thissection, $metaname, $metavalue->as_string());
232 }
233 }
234 }
235}
2361;
Note: See TracBrowser for help on using the repository browser.