[13486] | 1 | ###########################################################################
|
---|
| 2 | #
|
---|
| 3 | # MARCXMLPlug.pm
|
---|
| 4 | # A component of the Greenstone digital library software
|
---|
| 5 | # from the New Zealand Digital Library Project at the
|
---|
| 6 | # University of Waikato, New Zealand.
|
---|
| 7 | #
|
---|
| 8 | # Copyright (C) 2001 New Zealand Digital Library Project
|
---|
| 9 | #
|
---|
| 10 | # This program is free software; you can redistribute it and/or modify
|
---|
| 11 | # it under the terms of the GNU General Public License as published by
|
---|
| 12 | # the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | # (at your option) any later version.
|
---|
| 14 | #
|
---|
| 15 | # This program is distributed in the hope that it will be useful,
|
---|
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | # GNU General Public License for more details.
|
---|
| 19 | #
|
---|
| 20 | # You should have received a copy of the GNU General Public License
|
---|
| 21 | # along with this program; if not, write to the Free Software
|
---|
| 22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | #
|
---|
| 24 | ###########################################################################
|
---|
| 25 |
|
---|
| 26 | # Processes MARCXML documents. Note that this plugin does no
|
---|
| 27 | # syntax checking (though the XML::Parser module tests for
|
---|
| 28 | # well-formedness).
|
---|
| 29 |
|
---|
| 30 | package MARCXMLPlug;
|
---|
| 31 |
|
---|
| 32 | use XMLPlug;
|
---|
| 33 |
|
---|
| 34 | use strict;
|
---|
| 35 | no strict 'refs'; # allow filehandles to be variables and viceversa
|
---|
| 36 |
|
---|
| 37 | sub BEGIN {
|
---|
| 38 | @MARCXMLPlug::ISA = ('XMLPlug');
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | my $arguments = [{'name' => "metadata_mapping_file",
|
---|
| 42 | 'desc' => "{MARCXMLPlug.metadata_mapping_file}",
|
---|
| 43 | 'type' => "string",
|
---|
[15018] | 44 | 'deft' => "marctodc.txt",
|
---|
[13486] | 45 | 'reqd' => "no" }];
|
---|
| 46 |
|
---|
| 47 | my $options = { 'name' => "MARCXMLPlug",
|
---|
| 48 | 'desc' => "{MARCXMLPlug.desc}",
|
---|
| 49 | 'abstract' => "no",
|
---|
| 50 | 'inherits' => "yes",
|
---|
| 51 | 'args' => $arguments
|
---|
| 52 | };
|
---|
| 53 |
|
---|
| 54 | sub new {
|
---|
| 55 | my ($class) = shift (@_);
|
---|
| 56 | my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
|
---|
| 57 | push(@$pluginlist, $class);
|
---|
| 58 |
|
---|
| 59 | if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
|
---|
| 60 | if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
|
---|
| 61 |
|
---|
| 62 | my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
|
---|
| 63 |
|
---|
| 64 | $self->{'content'} = "";
|
---|
[15018] | 65 | $self->{'xmlcontent'} = "";
|
---|
[13486] | 66 | $self->{'record_count'} = 1;
|
---|
| 67 | $self->{'language'} = "";
|
---|
| 68 | $self->{'encoding'} = "";
|
---|
| 69 | $self->{'marc_mapping'} = {};
|
---|
| 70 | $self->{'current_code'} = "";
|
---|
| 71 | $self->{'current_tag'} = "";
|
---|
| 72 | $self->{'current_element'} = "";
|
---|
| 73 | $self->{'metadata_mapping'} = undef;
|
---|
| 74 | $self->{'num_processed'} = 0;
|
---|
[13496] | 75 | $self->{'indent'} = 0;
|
---|
| 76 |
|
---|
[13486] | 77 | return bless $self, $class;
|
---|
| 78 | }
|
---|
| 79 |
|
---|
| 80 | sub get_doctype {
|
---|
| 81 | my $self = shift(@_);
|
---|
| 82 |
|
---|
| 83 | return "collection";
|
---|
| 84 | }
|
---|
| 85 |
|
---|
| 86 |
|
---|
[15018] | 87 | sub _parse_marc_metadata_mapping
|
---|
| 88 | {
|
---|
| 89 | my $self = shift(@_);
|
---|
| 90 | my ($mm_file,$metadata_mapping) = @_;
|
---|
| 91 |
|
---|
| 92 | my $outhandle = $self->{'outhandle'};
|
---|
| 93 |
|
---|
| 94 | if (open(MMIN, "<$mm_file"))
|
---|
| 95 | {
|
---|
| 96 | my $l=0;
|
---|
| 97 | my $line;
|
---|
| 98 | while (defined($line=<MMIN>))
|
---|
| 99 | {
|
---|
| 100 | $l++;
|
---|
| 101 | chomp $line;
|
---|
| 102 | $line =~ s/#.*$//; # strip out any comments, including end of line
|
---|
| 103 | next if ($line =~ m/^\s*$/);
|
---|
| 104 | $line =~ s/\s+$//; # remove any white space at end of line
|
---|
| 105 |
|
---|
| 106 | my $parse_error_count = 0;
|
---|
| 107 | if ($line =~ m/^-(\d+)\s*$/) {
|
---|
| 108 | # special "remove" rule syntax
|
---|
| 109 | my $marc_info = $1;
|
---|
| 110 | if (defined $metadata_mapping->{$marc_info}) {
|
---|
| 111 | delete $metadata_mapping->{$marc_info};
|
---|
| 112 | }
|
---|
| 113 | else {
|
---|
| 114 | print $outhandle "Parse Warning: Did not file pre-existing rule $marc_info to remove";
|
---|
| 115 | print $outhandle " on line $l of $mm_file:\n";
|
---|
| 116 | print $outhandle " $line\n";
|
---|
| 117 | }
|
---|
| 118 | }
|
---|
| 119 | elsif ($line =~ m/^(.*?)->\s*([\w\^]+)$/)
|
---|
| 120 | {
|
---|
| 121 | my $lhs = $1;
|
---|
| 122 | my $gsdl_info = $2;
|
---|
| 123 |
|
---|
| 124 | my @fields = split(/,\s*/,$lhs);
|
---|
| 125 | my $f;
|
---|
| 126 | while ($f = shift (@fields)) {
|
---|
| 127 | $f =~ s/\s+$//; # remove any white space at end of line
|
---|
| 128 |
|
---|
| 129 | if ($f =~ m/^(\d+)\-(\d+)$/) {
|
---|
| 130 | # number range => genrate number in range and
|
---|
| 131 | # push on to array
|
---|
| 132 | push(@fields,$1..$2);
|
---|
| 133 | next;
|
---|
| 134 | }
|
---|
| 135 |
|
---|
| 136 | if ($f =~ m/^(\d+)((?:(?:\$|\^)\w)*)\s*$/) {
|
---|
| 137 |
|
---|
| 138 | my $marc_info = $1;
|
---|
| 139 | my $opt_sub_fields = $2;
|
---|
| 140 |
|
---|
| 141 | if ($opt_sub_fields ne "") {
|
---|
| 142 | my @sub_fields = split(/\$|\^/,$opt_sub_fields);
|
---|
| 143 | shift @sub_fields; # skip first entry, which is blank
|
---|
| 144 |
|
---|
| 145 | foreach my $sub_field (@sub_fields) {
|
---|
| 146 | $metadata_mapping->{$marc_info."\$".$sub_field} = $gsdl_info;
|
---|
| 147 | }
|
---|
| 148 | }
|
---|
| 149 | else {
|
---|
| 150 | # no subfields to worry about
|
---|
| 151 | $marc_info =~ s/\^/\$/;
|
---|
| 152 | $metadata_mapping->{$marc_info} = $gsdl_info;
|
---|
| 153 | }
|
---|
| 154 | }
|
---|
| 155 | else {
|
---|
| 156 | $parse_error_count++;
|
---|
| 157 | }
|
---|
| 158 | }
|
---|
| 159 | }
|
---|
| 160 | else
|
---|
| 161 | {
|
---|
| 162 | $parse_error_count++;
|
---|
| 163 | }
|
---|
| 164 |
|
---|
| 165 | if ($parse_error_count>0) {
|
---|
| 166 |
|
---|
| 167 | print $outhandle "Parse Error: $parse_error_count syntax error(s) on line $l of $mm_file:\n";
|
---|
| 168 | print $outhandle " $line\n";
|
---|
| 169 | }
|
---|
| 170 | }
|
---|
| 171 | close(MMIN);
|
---|
| 172 | }
|
---|
| 173 | else
|
---|
| 174 | {
|
---|
| 175 | print STDERR "Unable to open $mm_file: $!\n";
|
---|
| 176 | }
|
---|
| 177 | }
|
---|
| 178 |
|
---|
| 179 |
|
---|
| 180 | sub parse_marc_metadata_mapping
|
---|
| 181 | {
|
---|
| 182 | my $self = shift(@_);
|
---|
| 183 | my ($mm_file_or_files) = @_;
|
---|
| 184 |
|
---|
| 185 | my $metadata_mapping = {};
|
---|
| 186 |
|
---|
| 187 | if (ref ($mm_file_or_files) eq 'SCALAR') {
|
---|
| 188 | my $mm_file = $mm_file_or_files;
|
---|
| 189 | $self->_parse_marc_metadata_mapping($mm_file,$metadata_mapping);
|
---|
| 190 | }
|
---|
| 191 | else {
|
---|
| 192 | my $mm_files = $mm_file_or_files;
|
---|
| 193 |
|
---|
| 194 | # Need to process files in reverse order. This is so in the
|
---|
| 195 | # case where we have both a "collect" and "main" version,
|
---|
| 196 | # the "collect" one tops up the main one
|
---|
| 197 |
|
---|
| 198 | my $mm_file;
|
---|
| 199 | while ($mm_file = pop(@$mm_files)) {
|
---|
| 200 | $self->_parse_marc_metadata_mapping($mm_file,$metadata_mapping);
|
---|
| 201 | }
|
---|
| 202 | }
|
---|
| 203 |
|
---|
| 204 | return $metadata_mapping;
|
---|
| 205 | }
|
---|
| 206 |
|
---|
| 207 |
|
---|
[13486] | 208 | sub init {
|
---|
| 209 | my $self = shift (@_);
|
---|
| 210 | my ($verbosity, $outhandle, $failhandle) = @_;
|
---|
| 211 |
|
---|
| 212 | ## the mapping file has already been loaded
|
---|
| 213 | if (defined $self->{'metadata_mapping'} ){
|
---|
| 214 | $self->SUPER::init(@_);
|
---|
| 215 | return;
|
---|
| 216 | }
|
---|
| 217 |
|
---|
| 218 | # read in the metadata mapping file
|
---|
[15018] | 219 | my $mm_files = &util::locate_config_files($self->{'metadata_mapping_file'});
|
---|
[13486] | 220 |
|
---|
| 221 |
|
---|
[15018] | 222 | if (scalar(@$mm_files)==0)
|
---|
[13486] | 223 | {
|
---|
| 224 | my $msg = "MARCXMLPlug ERROR: Can't locate mapping file \"" .
|
---|
[15018] | 225 | $self->{'metadata_mapping_file'} . "\".\n " .
|
---|
[13486] | 226 | " No marc files can be processed.\n";
|
---|
| 227 |
|
---|
| 228 | print $outhandle $msg;
|
---|
| 229 | print $failhandle $msg;
|
---|
| 230 | $self->{'metadata_mapping'} = undef;
|
---|
| 231 | # We pick up the error in process() if there is no $mm_file
|
---|
| 232 | # If we exit here, then pluginfo.pl will exit too!
|
---|
| 233 | }
|
---|
[15018] | 234 | else {
|
---|
| 235 | $self->{'metadata_mapping'} = $self->parse_marc_metadata_mapping($mm_files);
|
---|
[13486] | 236 | }
|
---|
| 237 |
|
---|
| 238 |
|
---|
| 239 | ##map { print STDERR $_."=>".$metadata_mapping->{$_}."\n"; } keys %$metadata_mapping;
|
---|
| 240 |
|
---|
| 241 | $self->SUPER::init(@_);
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | # Called for DOCTYPE declarations - use die to bail out if this doctype
|
---|
| 245 | # is not meant for this plugin
|
---|
| 246 | sub xml_doctype {
|
---|
| 247 | my $self = shift(@_);
|
---|
| 248 |
|
---|
| 249 | my ($expat, $name, $sysid, $pubid, $internal) = @_;
|
---|
| 250 | return;
|
---|
| 251 |
|
---|
| 252 | }
|
---|
| 253 |
|
---|
| 254 |
|
---|
| 255 | sub xml_start_document {
|
---|
| 256 | my $self = shift(@_);
|
---|
| 257 |
|
---|
| 258 | my ($expat, $name, $sysid, $pubid, $internal) = @_;
|
---|
| 259 |
|
---|
| 260 |
|
---|
| 261 | my $file = $self->{'file'};
|
---|
| 262 | my $filename = $self->{'filename'};
|
---|
[13496] | 263 |
|
---|
[13486] | 264 | my ($language, $encoding) = $self->textcat_get_language_encoding ($filename);
|
---|
| 265 |
|
---|
| 266 | $self->{'language'} = $language;
|
---|
| 267 | $self->{'encoding'} = $encoding;
|
---|
| 268 | $self->{'element_count'} = 1;
|
---|
[13496] | 269 | $self->{'indent'} = 0;
|
---|
[13486] | 270 | my $outhandle = $self->{'outhandle'};
|
---|
| 271 | print $outhandle "MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
|
---|
| 272 | print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlug'>\n" if $self->{'gli'};
|
---|
| 273 |
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 | sub xml_end_document {
|
---|
| 277 |
|
---|
| 278 | }
|
---|
| 279 |
|
---|
| 280 | sub xml_start_tag {
|
---|
| 281 | my $self = shift;
|
---|
| 282 | my $expat = shift;
|
---|
| 283 | my $element = shift;
|
---|
[13496] | 284 |
|
---|
[15018] | 285 | my $text = $_;
|
---|
| 286 | my $escaped_text = $self->escape_text($_);
|
---|
[13486] | 287 |
|
---|
| 288 | $self->{'current_element'} = $element;
|
---|
| 289 |
|
---|
| 290 | ##get all atributes of this element and store it in a map name=>value
|
---|
| 291 | my %attr_map = ();
|
---|
| 292 | my $attrstring = $_;
|
---|
| 293 | while ($attrstring =~ /(\w+)=\"(\w+)\"/){
|
---|
| 294 | $attr_map{$1}=$2;
|
---|
[13496] | 295 | $attrstring = $'; #'
|
---|
[13486] | 296 | }
|
---|
| 297 |
|
---|
[15018] | 298 |
|
---|
[13486] | 299 | my $processor = $self->{'processor'};
|
---|
| 300 |
|
---|
| 301 | ##create a new document for each record
|
---|
| 302 | if ($element eq "record") {
|
---|
[13496] | 303 | my $filename = $self->{'filename'};
|
---|
[13486] | 304 | my $language = $self->{'language'};
|
---|
| 305 | my $encoding = $self->{'encoding'};
|
---|
| 306 | my $file = $self->{'file'};
|
---|
| 307 | my $doc_obj = new doc($filename);
|
---|
| 308 | $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
|
---|
| 309 | $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Language", $language);
|
---|
| 310 | $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding);
|
---|
| 311 | my ($filemeta) = $file =~ /([^\\\/]+)$/;
|
---|
| 312 | $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
|
---|
| 313 | $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$self->{'record_count'}");
|
---|
| 314 | if ($self->{'cover_image'}) {
|
---|
| 315 | $self->associate_cover_image($doc_obj, $filename);
|
---|
| 316 | }
|
---|
| 317 | $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
|
---|
| 318 | $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "XML");
|
---|
| 319 |
|
---|
| 320 | my $outhandle = $self->{'outhandle'};
|
---|
| 321 | print $outhandle "Record $self->{'record_count'} - MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
|
---|
| 322 |
|
---|
| 323 | $self->{'record_count'}++;
|
---|
| 324 | $self->{'doc_obj'} = $doc_obj;
|
---|
| 325 | $self->{'num_processed'}++;
|
---|
| 326 |
|
---|
| 327 | }
|
---|
| 328 |
|
---|
| 329 | ## get the marc code, for example 520
|
---|
| 330 | if ($element eq "datafield") {
|
---|
[13496] | 331 | if (defined $attr_map{'tag'} and $attr_map{'tag'} ne ""){
|
---|
[13486] | 332 | $self->{'current_tag'} = $attr_map{tag};
|
---|
| 333 | }
|
---|
| 334 | }
|
---|
| 335 |
|
---|
| 336 |
|
---|
| 337 | ## append the subcode to the marc code for example 520a or 520b
|
---|
| 338 | if ($element eq "subfield"){
|
---|
[13496] | 339 | if (defined $attr_map{'code'} and $attr_map{'code'} ne "" and $self->{'current_tag'} ne ""){
|
---|
[13486] | 340 | $self->{'current_code'} = $attr_map{'code'};
|
---|
| 341 | }
|
---|
| 342 | }
|
---|
[13496] | 343 |
|
---|
| 344 | if ($element eq "record"){
|
---|
| 345 | $self->{'indent'} = 0;
|
---|
[15018] | 346 | $self->{'content'} = "";
|
---|
| 347 | $self->{'xmlcontent'} = "";
|
---|
[13496] | 348 | }
|
---|
| 349 | else {
|
---|
| 350 | if ($element ne "subfield"){
|
---|
| 351 | $self->{'indent'} = 1;
|
---|
| 352 | }
|
---|
| 353 | else{
|
---|
| 354 | $self->{'indent'} = 2;
|
---|
| 355 | }
|
---|
| 356 | }
|
---|
| 357 |
|
---|
[15018] | 358 |
|
---|
| 359 | if ($element eq "collection") {
|
---|
| 360 | # remember the full start tag for <collection ...>
|
---|
| 361 | # This is needed to wrap around each <record> when generating its associate MARCXML file
|
---|
| 362 |
|
---|
| 363 | $self->{'xmlcollectiontag'} = $text;
|
---|
| 364 | }
|
---|
| 365 | else {
|
---|
| 366 | $self->{'content'} .= "<br/>" if ($element ne "record");
|
---|
| 367 | $self->{'content'} .= $self->calculate_indent($self->{'indent'}).$escaped_text;
|
---|
| 368 | $self->{'xmlcontent'} .= $text;
|
---|
| 369 | }
|
---|
[13496] | 370 |
|
---|
[13486] | 371 | }
|
---|
| 372 |
|
---|
| 373 |
|
---|
| 374 |
|
---|
| 375 | sub xml_end_tag {
|
---|
| 376 | my $self = shift(@_);
|
---|
| 377 | my ($expat, $element) = @_;
|
---|
[15018] | 378 |
|
---|
| 379 | my $text = $_;
|
---|
| 380 | my $escaped_text = $self->escape_text($_);
|
---|
[13496] | 381 |
|
---|
[13486] | 382 | if ($element eq "record" and defined $self->{'doc_obj'}) {
|
---|
| 383 | # process the document
|
---|
| 384 | my $processor = $self->{'processor'};
|
---|
| 385 | my $doc_obj = $self->{'doc_obj'};
|
---|
[15018] | 386 | $self->{'content'} .= "<br/>".$escaped_text;
|
---|
| 387 | $self->{'xmlcontent'} .= $text;
|
---|
[13496] | 388 |
|
---|
[15018] | 389 |
|
---|
| 390 | my $top_section = $doc_obj->get_top_section();
|
---|
| 391 |
|
---|
| 392 | my $tmp_marcxml_filename = &util::get_tmp_filename().".xml";
|
---|
| 393 | if (open (XMLOUT,">$tmp_marcxml_filename")) {
|
---|
| 394 |
|
---|
| 395 | print XMLOUT "<?xml-stylesheet type=\"text/xsl\" href=\"MARC21slim2English.xsl\"?>\n";
|
---|
| 396 | my $xml_content = $self->{'xmlcontent'};
|
---|
| 397 |
|
---|
| 398 | $xml_content = $self->{'xmlcollectiontag'}.$xml_content."</collection>";
|
---|
| 399 |
|
---|
| 400 | print XMLOUT $xml_content;
|
---|
| 401 |
|
---|
| 402 | close(XMLOUT);
|
---|
| 403 |
|
---|
| 404 | $doc_obj->associate_file($tmp_marcxml_filename,"marcxml.xml","text/xml", $top_section);
|
---|
| 405 |
|
---|
| 406 | # assicate xsl style file for presentation as HTML
|
---|
| 407 | my $xsl_filename = &util::filename_cat($ENV{'GSDLHOME'},"etc","MARC21slim2English.xsl");
|
---|
| 408 | $doc_obj->associate_file($xsl_filename,"MARC21slim2English.xsl","text/xml", $top_section);
|
---|
| 409 |
|
---|
| 410 | }
|
---|
| 411 | else {
|
---|
| 412 | my $outhandle = $self->{'outhandle'};
|
---|
| 413 | print $outhandle "Warning: Unable for write out associated MARCXML file $tmp_marcxml_filename\n";
|
---|
| 414 | }
|
---|
| 415 |
|
---|
| 416 | $self->add_OID($doc_obj, $self->{'record_count'});
|
---|
| 417 |
|
---|
[13496] | 418 | $doc_obj->add_utf8_text($doc_obj->get_top_section(),$self->{'content'});
|
---|
[13486] | 419 | $processor->process($doc_obj);
|
---|
| 420 |
|
---|
| 421 | ##clean up
|
---|
| 422 | $self->{'content'} = "";
|
---|
[15018] | 423 | $self->{'xmlcontent'} = "";
|
---|
[13486] | 424 | $self->{'doc_obj'} = undef;
|
---|
[13496] | 425 | return;
|
---|
[13486] | 426 | }
|
---|
| 427 |
|
---|
| 428 | ## map the xmlmarc to gsdl metadata
|
---|
| 429 | if ($element eq "datafield" and defined $self->{'doc_obj'} and defined $self->{'marc_mapping'}){
|
---|
| 430 | my $metadata_mapping = $self->{'metadata_mapping'};
|
---|
| 431 | my $marc_mapping = $self->{'marc_mapping'};
|
---|
| 432 | my $doc_obj = $self->{'doc_obj'};
|
---|
| 433 |
|
---|
[15018] | 434 | ## print STDERR "**** Marc Record\n";
|
---|
| 435 | ## map { print STDERR $_."=>".$marc_mapping->{$_}."\n"; } keys %$marc_mapping;
|
---|
| 436 | ## print STDERR "**** Metadata Mapping\n";
|
---|
| 437 | ## map { print STDERR $_."=>".$metadata_mapping->{$_}."\n"; } keys %$metadata_mapping;
|
---|
[13486] | 438 |
|
---|
[15018] | 439 |
|
---|
[13486] | 440 | foreach my $marc_field (keys %$metadata_mapping){
|
---|
[15018] | 441 |
|
---|
| 442 | ## test whether this field has subfield
|
---|
[13486] | 443 | my $subfield = undef;
|
---|
[15018] | 444 | if ($marc_field =~ /(\d\d\d)(?:\$|\^)?(\w)/){
|
---|
| 445 | $marc_field = $1;
|
---|
| 446 | $subfield = $2;
|
---|
| 447 | }
|
---|
[13486] | 448 |
|
---|
[15018] | 449 | my $matched_field = $marc_mapping->{$marc_field};
|
---|
| 450 |
|
---|
| 451 | if (defined $matched_field) {
|
---|
| 452 |
|
---|
| 453 | my $meta_name = undef;
|
---|
| 454 | my $meta_value = undef;
|
---|
| 455 |
|
---|
[13486] | 456 | if (defined $subfield){
|
---|
[15018] | 457 | $meta_name = $metadata_mapping->{$marc_field."\$".$subfield};
|
---|
| 458 |
|
---|
| 459 | $meta_value = $matched_field->{$subfield};
|
---|
| 460 |
|
---|
| 461 | if (!defined $meta_value) {
|
---|
| 462 | # record read in does not have the specified subfield
|
---|
| 463 | next;
|
---|
| 464 | }
|
---|
[13486] | 465 | }
|
---|
[15018] | 466 | else {
|
---|
| 467 | $meta_name = $metadata_mapping->{$marc_field};
|
---|
| 468 |
|
---|
| 469 | # no subfield => get all the values
|
---|
| 470 | foreach my $value (sort keys %{$matched_field}) {
|
---|
| 471 | $meta_value .= $matched_field->{$value} ." ";
|
---|
[13486] | 472 | }
|
---|
[15018] | 473 |
|
---|
[13486] | 474 | }
|
---|
| 475 |
|
---|
| 476 | ## escape [ and ]
|
---|
| 477 | $meta_value =~ s/\[/\\\[/g;
|
---|
| 478 | $meta_value =~ s/\]/\\\]/g;
|
---|
| 479 | ##print STDERR "$meta_name=$meta_value\n";
|
---|
| 480 | $doc_obj->add_utf8_metadata($doc_obj->get_top_section(),$meta_name, $meta_value);
|
---|
| 481 |
|
---|
| 482 | }
|
---|
| 483 |
|
---|
| 484 | }
|
---|
| 485 |
|
---|
| 486 | ##clean up
|
---|
| 487 | $self->{'marc_mapping'} = undef;
|
---|
| 488 | $self->{'current_tag'} = "";
|
---|
| 489 | }
|
---|
[13496] | 490 |
|
---|
| 491 | if ($element eq "datafield"){
|
---|
| 492 | $self->{'indent'} = 1;
|
---|
[15018] | 493 | $self->{'content'} .= "<br/>".$self->calculate_indent($self->{'indent'}).$escaped_text;
|
---|
| 494 | $self->{'xmlcontent'} .= $text;
|
---|
[13496] | 495 | }
|
---|
| 496 | else{
|
---|
[15018] | 497 | $self->{'content'} .= $escaped_text;
|
---|
| 498 | $self->{'xmlcontent'} .= $text;
|
---|
[13496] | 499 | }
|
---|
| 500 |
|
---|
[13486] | 501 | }
|
---|
| 502 |
|
---|
| 503 |
|
---|
| 504 | sub set_OID {
|
---|
| 505 | my $self = shift (@_);
|
---|
[15018] | 506 | my ($doc_obj, $record_number) = @_;
|
---|
[13486] | 507 |
|
---|
[15018] | 508 | # first set it to generate hash value
|
---|
| 509 | $doc_obj->set_OID();
|
---|
| 510 |
|
---|
| 511 | # then top it up with an "r" + record-number suffix
|
---|
| 512 | my $id = $doc_obj->get_OID();
|
---|
[13486] | 513 | $doc_obj->set_OID($id . "r" . $record_number);
|
---|
| 514 | }
|
---|
| 515 |
|
---|
| 516 | sub xml_text {
|
---|
| 517 | my $self = shift(@_);
|
---|
| 518 | my ($expat) = @_;
|
---|
| 519 |
|
---|
[15018] | 520 | my $text = $_;
|
---|
| 521 | my $escaped_text = $self->escape_text($_);
|
---|
[13486] | 522 |
|
---|
[15018] | 523 | # protect against & in raw text file
|
---|
| 524 | $text =~ s/&/&/g; # can't have & in raw form, even in 'raw' xml text
|
---|
| 525 |
|
---|
[13486] | 526 | ## store the text of a marc code, for exapmle 520a=>A poem about....
|
---|
| 527 | if ($self->{'current_element'} eq "subfield" and $self->{'current_code'} ne "" and $_ ne "" ){
|
---|
| 528 | ##stored it in the marc_mapping
|
---|
[15018] | 529 |
|
---|
| 530 | my $current_tag = $self->{'current_tag'};
|
---|
| 531 | my $current_code = $self->{'current_code'};
|
---|
| 532 |
|
---|
| 533 | $self->{'marc_mapping'}->{$current_tag}->{$current_code} .= $_;
|
---|
| 534 |
|
---|
[13486] | 535 | $self->{'current_code'} = "";
|
---|
| 536 | }
|
---|
| 537 |
|
---|
[15018] | 538 | $self->{'content'} .= $escaped_text;
|
---|
| 539 | $self->{'xmlcontent'} .= $text;
|
---|
[13486] | 540 |
|
---|
| 541 | }
|
---|
| 542 |
|
---|
[13496] | 543 | sub calculate_indent{
|
---|
| 544 | my ($self,$num) = @_;
|
---|
[13486] | 545 |
|
---|
[13496] | 546 | my $indent ="";
|
---|
| 547 |
|
---|
| 548 | for (my $i=0; $i<$num;$i++){
|
---|
| 549 | $indent .= " ";
|
---|
| 550 | }
|
---|
| 551 |
|
---|
| 552 | return $indent;
|
---|
| 553 |
|
---|
| 554 | }
|
---|
| 555 |
|
---|
| 556 | sub escape_text {
|
---|
| 557 | my ($self,$text) = @_;
|
---|
| 558 | # special characters in the xml encoding
|
---|
| 559 | $text =~ s/&/&/g; # this has to be first...
|
---|
| 560 | $text =~ s/</</g;
|
---|
| 561 | $text =~ s/>/>/g;
|
---|
| 562 | $text =~ s/\"/"/g;
|
---|
| 563 |
|
---|
| 564 | return $text;
|
---|
| 565 | }
|
---|
| 566 |
|
---|
| 567 |
|
---|
[15018] | 568 | sub unescape_text {
|
---|
| 569 | my ($self,$text) = @_;
|
---|
| 570 | # special characters in the xml encoding
|
---|
| 571 | $text =~ s/</</g;
|
---|
| 572 | $text =~ s/>/>/g;
|
---|
| 573 | $text =~ s/"/\"/g;
|
---|
| 574 |
|
---|
| 575 | $text =~ s/&/&/g; # can't have & in raw form, even in unescaped xml!
|
---|
| 576 |
|
---|
| 577 | return $text;
|
---|
| 578 | }
|
---|
| 579 |
|
---|
| 580 |
|
---|
[13486] | 581 | 1;
|
---|
| 582 |
|
---|
| 583 |
|
---|