Changeset 13198 for trunk/gsdl/perllib/plugins
- Timestamp:
- 2006-10-30T16:18:23+13:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/MARCPlug.pm
r12401 r13198 80 80 my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists); 81 81 82 $self->{'type'} = ""; 83 82 84 return bless $self, $class; 83 85 } … … 150 152 sub get_default_split_exp { 151 153 # \r\n for msdos eol, \n for unix 152 return q^\r?\n\s*\r?\n ^;154 return q^\r?\n\s*\r?\n|\[\w+\]Record type: USmarc^; 153 155 } 154 156 … … 181 183 $self->{'readfile_encoding'}->{$filename} = $encoding; 182 184 183 my @marc_entries = (); 184 185 185 186 if (!-r $filename) 186 187 { … … 190 191 } 191 192 193 ##handle ascii marc 194 #test whether this is ascii marc file 195 if (open (FILE, $filename)) { 196 while (defined (my $line = <FILE>)) { 197 $$textref .= $line; 198 if ($line =~ /\[\w+\]Record type:/){ 199 undef $/; 200 $$textref .= <FILE>; 201 $/ = "\n"; 202 $self->{'type'} = "ascii"; 203 close FILE; 204 return; 205 } 206 } 207 close FILE; 208 } 209 210 211 $$textref = ""; 212 my @marc_entries = (); 213 192 214 my $batch = new MARC::Batch( 'USMARC', $filename ); 193 215 while ( my $marc = $batch->next ) 194 216 { 195 push(@marc_entries,$marc);217 push(@marc_entries,$marc); 196 218 $$textref .= $marc->as_formatted(); 197 219 $$textref .= "\n\n"; # for SplitPlug - see default_split_exp above... … … 231 253 232 254 my $encoding = $self->{'readfile_encoding'}->{$filename}; 233 $self->extract_metadata ($marc, $metadata, $encoding, $doc_obj, $cursection); 255 256 if ($self->{'type'} ne "ascii" ){ 257 $self->extract_metadata ($marc, $metadata, $encoding, $doc_obj, $cursection); 258 } 259 else{ 260 $self->extract_ascii_metadata ($$textref,$metadata,$doc_obj, $cursection); 261 } 234 262 235 263 # add spaces after the sub-field markers, for word boundaries … … 290 318 my $outhandle = $self->{'outhandle'}; 291 319 320 if (!defined $marc){ 321 return; 322 } 323 292 324 my $metadata_mapping = $self->{'metadata_mapping'}; 293 325 my $mm; 326 294 327 foreach $mm ( @$metadata_mapping ) 295 328 { 296 329 my $marc_field = $mm->{'marc'}; 330 297 331 my @metavalues = $marc->field($marc_field); 298 332 … … 309 343 } 310 344 } 345 346 sub extract_ascii_metadata 347 { 348 my $self = shift (@_); 349 my ($text, $metadata,$doc_obj, $section) = @_; 350 my $outhandle = $self->{'outhandle'}; 351 my $metadata_mapping = $self->{'metadata_mapping'}; 352 ## get fields 353 my @fields = split(/[\n\r]+/,$text); 354 my $marc_mapping ={}; 355 356 foreach my $field (@fields){ 357 if ($field ne ""){ 358 $field =~ /^(\d\d\d)\s/; 359 my $code = $1; 360 $field = $'; 361 ##get subfields 362 my @subfields = split(/\$/,$field); 363 my $i=0; 364 $marc_mapping->{$code} = []; 365 foreach my $subfield (@subfields){ 366 if ($i == 0){ 367 ##print STDERR $subfield."\n"; 368 push(@{$marc_mapping->{$code}},"info"); 369 push(@{$marc_mapping->{$code}},$subfield); 370 371 $i++; 372 } 373 else{ 374 $subfield =~ /(\w)\s/; 375 ##print STDERR "$1=>$'\n"; 376 push(@{$marc_mapping->{$code}},$1); 377 push(@{$marc_mapping->{$code}},$'); 378 } 379 } 380 } 381 } 382 383 384 foreach my $mm ( @$metadata_mapping ) 385 { 386 my $marc_field = $mm->{'marc'}; 387 388 my $matched_field = $marc_mapping->{$marc_field}; 389 my $subfield = undef; 390 391 if (defined $matched_field){ 392 ## test whether this field has subfield 393 if ($marc_field =~ /\d\d\d(\w)/){ 394 $subfield = $1; 395 } 396 my $metaname = $mm->{'gsdl'}; 397 398 my $metavalue; 399 if (defined $subfield){ 400 my %mapped_subfield = {@$matched_field}; 401 $metavalue = $mapped_subfield{$subfield}; 402 } 403 else{ ## get all values except info 404 my $i =0; 405 foreach my $value (@$matched_field){ 406 if ($i%2 != 0 and $i != 1){ 407 $metavalue .= $value." "; 408 } 409 $i++; 410 } 411 } 412 413 ## escape [ and ] 414 $metavalue =~ s/\[/\\\[/g; 415 $metavalue =~ s/\]/\\\]/g; 416 ##print STDERR "$metaname=$metavalue\n"; 417 $doc_obj->add_metadata ($section, $metaname, $metavalue) ; 418 } 419 420 } 421 422 } 423 424 311 425 1;
Note:
See TracChangeset
for help on using the changeset viewer.