Changeset 16692
- Timestamp:
- 2008-08-11T10:44:04+12:00 (16 years ago)
- Location:
- gsdl/trunk/perllib
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/MARCPlugin.pm
r16104 r16692 31 31 use unicode; 32 32 use util; 33 use marcmapping; 33 34 34 35 use strict; … … 42 43 my $arguments = 43 44 [ { 'name' => "metadata_mapping", 44 'desc' => "{ MARCPlugin.metadata_mapping}",45 'desc' => "{common.deprecated} {MARCPlugin.metadata_mapping}", 45 46 'type' => "string", 46 'deft' => " marctodc.txt",47 'deft' => "", 47 48 'hiddengli' => "yes", # deprecated in favour of 'metadata_mapping_file' 48 49 'reqd' => "no" }, … … 50 51 'desc' => "{MARCXMLPlugin.metadata_mapping_file}", 51 52 'type' => "string", 52 'deft' => " ",53 'deft' => "marctodc.txt", 53 54 'reqd' => "no" }, 54 55 { 'name' => "process_exp", … … 99 100 # preference 100 101 101 if ($self->{'metadata_mapping _file'} eq"") {102 # If nothing set in the new version, use the old version103 # that defaults to 'marctodc.txt'102 if ($self->{'metadata_mapping'} ne "") { 103 print STDERR "MARCPlugin WARNING:: the metadata_mapping option is set but has been deprecated. Please use metadata_mapping_file option instead\n"; 104 # If the old version is set, use it. 104 105 $self->{'metadata_mapping_file'} = $self->{'metadata_mapping'}; 105 106 } 106 107 $self->{'metadata_mapping'} = undef; 107 108 $self->{'type'} = ""; 108 109 … … 114 115 my ($verbosity, $outhandle, $failhandle) = @_; 115 116 116 my @metadata_mapping = (); 117 118 # read in the metadata mapping file 119 120 my $mm_file = &util::locate_config_file($self->{'metadata_mapping_file'}); 121 122 if (!defined $mm_file) 123 { 124 117 ## the mapping file has already been loaded 118 if (defined $self->{'metadata_mapping'} ){ 119 $self->SUPER::init(@_); 120 return; 121 } 122 123 # read in the metadata mapping files 124 my $mm_files = &util::locate_config_files($self->{'metadata_mapping_file'}); 125 126 if (scalar(@$mm_files)==0) 127 { 125 128 my $msg = "MARCPlugin ERROR: Can't locate mapping file \"" . 126 $self->{'metadata_mapping_file'} . "\".\n " .129 $self->{'metadata_mapping_file'} . "\".\n " . 127 130 " No marc files can be processed.\n"; 128 131 … … 133 136 # If we exit here, then pluginfo.pl will exit too! 134 137 } 135 elsif (open(MMIN, "<$mm_file")) 136 { 137 my $l=1; 138 my $line; 139 while (defined($line=<MMIN>)) 140 { 141 chomp $line; 142 if ($line =~ m/^(\d+)\s*->\s*([\w\^]+)$/) 143 { 144 my $marc_info = $1; 145 my $gsdl_info = $2; 146 my $mapping = { 'marc' => $marc_info, 'gsdl' => $gsdl_info }; 147 push(@metadata_mapping,$mapping); 148 } 149 elsif ($line !~ m/^\#/ # allow comments (# in first column) 150 && $line !~ m/^\s*$/) # allow blank lines 151 { 152 print $outhandle "Parse error on line $l of $mm_file:\n"; 153 print $outhandle " \"$line\"\n"; 154 } 155 $l++ 156 } 157 close(MMIN); 158 159 $self->{'metadata_mapping'} = \@metadata_mapping; 160 } 161 else 162 { 163 print STDERR "Unable to open $mm_file: $!\n"; 164 } 165 166 138 else { 139 $self->{'metadata_mapping'} = &marcmapping::parse_marc_metadata_mapping($mm_files, $outhandle); 140 } 141 142 ##map { print STDERR $_."=>".$self->{'metadata_mapping'}->{$_}."\n"; } keys %{$self->{'metadata_mapping'}}; 167 143 168 144 $self->SUPER::init(@_); 169 145 } 170 146 147 171 148 172 149 sub get_default_process_exp { … … 340 317 341 318 342 sub extract_metadata 319 sub extract_metadata 343 320 { 344 321 my $self = shift (@_); 322 345 323 my ($marc, $metadata, $encoding, $doc_obj, $section) = @_; 346 324 my $outhandle = $self->{'outhandle'}; … … 350 328 } 351 329 352 my $metadata_mapping = $self->{'metadata_mapping'}; 353 my $mm; 354 355 foreach $mm ( @$metadata_mapping ) 356 { 357 my $marc_field = $mm->{'marc'}; 358 359 my @metavalues = $marc->field($marc_field); 360 361 if (scalar(@metavalues)>0) 362 { 363 my $metaname = $mm->{'gsdl'}; 364 my $metavalue; 365 foreach $metavalue ( @metavalues ) 330 my $metadata_mapping = $self->{'metadata_mapping'};; 331 332 foreach my $marc_field ( keys %$metadata_mapping ) 333 { 334 my $gsdl_field = $metadata_mapping->{$marc_field}; 335 my $meta_value = undef; 336 337 # have we got a subfield? 338 my $subfield = undef; 339 if ($marc_field =~ /(\d\d\d)(?:\$|\^)?(\w)/){ 340 $marc_field = $1; 341 $subfield = $2; 342 } 343 if (defined $subfield) { 344 $meta_value = $marc->subfield($marc_field, $subfield); 345 if (defined $meta_value) { 346 ## escape [ and ] 347 $meta_value =~ s/\[/\\\[/g; 348 $meta_value =~ s/\]/\\\]/g; 349 my $metavalue_str = $self->to_utf8($encoding, $meta_value); 350 $doc_obj->add_utf8_metadata ($section, $gsdl_field, $metavalue_str); 351 } 352 } else { 353 354 my @metavalues = $marc->field($marc_field); 355 356 if (scalar(@metavalues)>0) 366 357 { 367 my $metavalue_str = $self->to_utf8($encoding,$metavalue->as_string()); 368 $doc_obj->add_utf8_metadata ($section, $metaname, $metavalue_str); 369 } 370 } 371 } 372 } 358 my $metavalue = undef; 359 foreach $metavalue ( @metavalues ) 360 { 361 my $metavalue_str = $self->to_utf8($encoding,$metavalue->as_string()); 362 $doc_obj->add_utf8_metadata ($section, $gsdl_field, $metavalue_str); 363 } 364 } 365 } 366 } 367 } 368 373 369 374 370 sub extract_ascii_metadata 375 371 { 376 372 my $self = shift (@_); 373 377 374 my ($text, $metadata,$doc_obj, $section) = @_; 378 375 my $outhandle = $self->{'outhandle'}; … … 386 383 $field =~ /^(\d\d\d)\s/; 387 384 my $code = $1; 388 $field = $'; 385 $field = $'; #' 389 386 ##get subfields 390 387 my @subfields = split(/\$/,$field); … … 403 400 ##print STDERR "$1=>$'\n"; 404 401 push(@{$marc_mapping->{$code}},$1); 405 push(@{$marc_mapping->{$code}},$'); 402 push(@{$marc_mapping->{$code}},$'); #' 406 403 } 407 404 } … … 410 407 411 408 412 foreach my $mm ( @$metadata_mapping ) 413 { 414 my $marc_field = $mm->{'marc'}; 409 foreach my $marc_field ( keys %$metadata_mapping ) 410 { 415 411 416 412 my $matched_field = $marc_mapping->{$marc_field}; … … 422 418 $subfield = $1; 423 419 } 424 my $metaname = $m m->{'gsdl'};420 my $metaname = $metadata_mapping->{$marc_field}; 425 421 426 422 my $metavalue; -
gsdl/trunk/perllib/plugins/MARCXMLPlugin.pm
r16521 r16692 31 31 32 32 use ReadXMLFile; 33 use marcmapping; 33 34 34 35 use strict; … … 85 86 86 87 87 sub _parse_marc_metadata_mapping88 {89 my $self = shift(@_);90 my ($mm_file,$metadata_mapping) = @_;91 92 my $outhandle = $self->{'outhandle'};93 94 if (open(MMIN, "<$mm_file"))95 {96 my $l=0;97 my $line;98 while (defined($line=<MMIN>))99 {100 $l++;101 chomp $line;102 $line =~ s/#.*$//; # strip out any comments, including end of line103 next if ($line =~ m/^\s*$/);104 $line =~ s/\s+$//; # remove any white space at end of line105 106 my $parse_error_count = 0;107 if ($line =~ m/^-(\d+)\s*$/) {108 # special "remove" rule syntax109 my $marc_info = $1;110 if (defined $metadata_mapping->{$marc_info}) {111 delete $metadata_mapping->{$marc_info};112 }113 else {114 print $outhandle "Parse Warning: Did not file pre-existing rule $marc_info to remove";115 print $outhandle " on line $l of $mm_file:\n";116 print $outhandle " $line\n";117 }118 }119 elsif ($line =~ m/^(.*?)->\s*([\w\^]+)$/)120 {121 my $lhs = $1;122 my $gsdl_info = $2;123 124 my @fields = split(/,\s*/,$lhs);125 my $f;126 while ($f = shift (@fields)) {127 $f =~ s/\s+$//; # remove any white space at end of line128 129 if ($f =~ m/^(\d+)\-(\d+)$/) {130 # number range => genrate number in range and131 # push on to array132 push(@fields,$1..$2);133 next;134 }135 136 if ($f =~ m/^(\d+)((?:(?:\$|\^)\w)*)\s*$/) {137 138 my $marc_info = $1;139 my $opt_sub_fields = $2;140 141 if ($opt_sub_fields ne "") {142 my @sub_fields = split(/\$|\^/,$opt_sub_fields);143 shift @sub_fields; # skip first entry, which is blank144 145 foreach my $sub_field (@sub_fields) {146 $metadata_mapping->{$marc_info."\$".$sub_field} = $gsdl_info;147 }148 }149 else {150 # no subfields to worry about151 $marc_info =~ s/\^/\$/;152 $metadata_mapping->{$marc_info} = $gsdl_info;153 }154 }155 else {156 $parse_error_count++;157 }158 }159 }160 else161 {162 $parse_error_count++;163 }164 165 if ($parse_error_count>0) {166 167 print $outhandle "Parse Error: $parse_error_count syntax error(s) on line $l of $mm_file:\n";168 print $outhandle " $line\n";169 }170 }171 close(MMIN);172 }173 else174 {175 print STDERR "Unable to open $mm_file: $!\n";176 }177 }178 179 180 sub parse_marc_metadata_mapping181 {182 my $self = shift(@_);183 my ($mm_file_or_files) = @_;184 185 my $metadata_mapping = {};186 187 if (ref ($mm_file_or_files) eq 'SCALAR') {188 my $mm_file = $mm_file_or_files;189 $self->_parse_marc_metadata_mapping($mm_file,$metadata_mapping);190 }191 else {192 my $mm_files = $mm_file_or_files;193 194 # Need to process files in reverse order. This is so in the195 # case where we have both a "collect" and "main" version,196 # the "collect" one tops up the main one197 198 my $mm_file;199 while ($mm_file = pop(@$mm_files)) {200 $self->_parse_marc_metadata_mapping($mm_file,$metadata_mapping);201 }202 }203 204 return $metadata_mapping;205 }206 207 208 88 sub init { 209 89 my $self = shift (@_); … … 216 96 } 217 97 218 # read in the metadata mapping file 98 # read in the metadata mapping files 219 99 my $mm_files = &util::locate_config_files($self->{'metadata_mapping_file'}); 220 100 … … 233 113 } 234 114 else { 235 $self->{'metadata_mapping'} = $self->parse_marc_metadata_mapping($mm_files);115 $self->{'metadata_mapping'} = &marcmapping::parse_marc_metadata_mapping($mm_files, $outhandle); 236 116 } 237 117 … … 316 196 } 317 197 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 318 $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", " XML");198 $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "MARCXML"); 319 199 320 200 my $outhandle = $self->{'outhandle'};
Note:
See TracChangeset
for help on using the changeset viewer.