Changeset 19213 for gsdl/trunk/perllib/plugins/OAIPlugin.pm
- Timestamp:
- 2009-04-23T10:03:26+12:00 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/OAIPlugin.pm
r18901 r19213 41 41 } 42 42 43 my $set_list = 44 [ { 'name' => "auto", 45 'desc' => "{OAIPlugin.metadata_set.auto}" }, 46 { 'name' => "dc", 47 'desc' => "{OAIPlugin.metadata_set.dc}" } 48 ]; 43 49 44 50 my $arguments = … … 48 54 'reqd' => "no", 49 55 'deft' => &get_default_process_exp() }, 56 { 'name' => "metadata_set", 57 'desc' => "{OAIPlugin.metadata_set}", 58 'type' => "enumstring", 59 'reqd' => "no", 60 'list' => $set_list, 61 'deft' => "dc" }, 50 62 { 'name' => "document_field", 51 63 'desc' => "{OAIPlugin.document_field}", … … 369 381 } 370 382 371 372 sub remap_dcterms_metadata 373 { 374 my $self = shift(@_); 375 376 my ($metaname) = @_; 377 378 my $dcterm_mapping = { 379 "alternative" => "dc.title", 380 "tableOfContents" => "dc.description", 381 "abstract" => "dc.description", 382 "created" => "dc.date", 383 "valid" => "dc.date", 384 "available" => "dc.date", 385 "issued" => "dc.date", 386 "modified" => "dc.date", 387 "dateAccepted" => "dc.date", 388 "dateCopyrighted" => "dc.date", 389 "dateSubmitted" => "dc.date", 390 "extent" => "dc.format", 391 "medium" => "dc.format", 392 "isVersionOf" => "dc.relation", 393 "hasVersion" => "dc.relation", 394 "isReplacedBy" => "dc.relation", 395 "replaces" => "dc.relation", 396 "isRequiredBy" => "dc.relation", 397 "requires" => "dc.relation", 398 "isPartOf" => "dc.relation", 399 "hasPart" => "dc.relation", 400 "isReferencedBy" => "dc.relation", 401 "references" => "dc.relation", 402 "isFormatOf" => "dc.relation", 403 "hasFormat" => "dc.relation", 404 "conformsTo" => "dc.relation", 405 "spatial" => "dc.coverage", 406 "temporal" => "dc.coverage", 407 # these are top level elements in our qualified dc metadata set 383 my $qualified_dc_mapping = { 384 "alternative" => "dc.title", 385 "tableOfContents" => "dc.description", 386 "abstract" => "dc.description", 387 "created" => "dc.date", 388 "valid" => "dc.date", 389 "available" => "dc.date", 390 "issued" => "dc.date", 391 "modified" => "dc.date", 392 "dateAccepted" => "dc.date", 393 "dateCopyrighted" => "dc.date", 394 "dateSubmitted" => "dc.date", 395 "extent" => "dc.format", 396 "medium" => "dc.format", 397 "isVersionOf" => "dc.relation", 398 "hasVersion" => "dc.relation", 399 "isReplacedBy" => "dc.relation", 400 "replaces" => "dc.relation", 401 "isRequiredBy" => "dc.relation", 402 "requires" => "dc.relation", 403 "isPartOf" => "dc.relation", 404 "hasPart" => "dc.relation", 405 "isReferencedBy" => "dc.relation", 406 "references" => "dc.relation", 407 "isFormatOf" => "dc.relation", 408 "hasFormat" => "dc.relation", 409 "conformsTo" => "dc.relation", 410 "spatial" => "dc.coverage", 411 "temporal" => "dc.coverage", 412 # these are now top level elements in our qualified dc metadata set 408 413 # "audience" => "dc.any", 409 414 # "accrualMethod" => "dc.any", … … 413 418 # "provenance" => "dc.any", 414 419 # "rightsHolder" => "dc.any", 415 "mediator" => "dc.audience", 416 "educationLevel" => "dc.audience", 417 "accessRights" => "dc.rights", 418 "license" => "dc.rights", 419 "bibliographicCitation" => "dc.identifier" 420 }; 420 "mediator" => "dc.audience", 421 "educationLevel" => "dc.audience", 422 "accessRights" => "dc.rights", 423 "license" => "dc.rights", 424 "bibliographicCitation" => "dc.identifier" 425 }; 426 427 sub remap_dc_metadata 428 { 429 my $self = shift(@_); 430 431 my ($metaname) = @_; 421 432 422 433 my ($prefix,$name) = ($metaname =~ m/^(.*?)\.(.*?)$/); 423 434 424 if ($prefix eq "dcterms" || $prefix eq "dc") 425 { 426 if (defined $dcterm_mapping->{$name}) 427 { 428 return $dcterm_mapping->{$name}."^".$name; 429 } 430 431 } 435 if (defined $qualified_dc_mapping->{$name}) { 436 437 return $qualified_dc_mapping->{$name}."^".$name; 438 } 439 440 432 441 return $metaname; # didn't get a match, return param passed in unchanged 433 442 } … … 439 448 my $outhandle = $self->{'outhandle'}; 440 449 441 # Only handles DC metadata442 443 450 $self->open_prettyprint_metadata_table(); 444 451 … … 448 455 449 456 # locate and remove outermost tag (ignoring any attribute information in top-level tag) 450 my ($ wrapper_metadata_xml,$inner_metadata_text) = ($metadata_text =~ m/<([^ >]+).*?>(.*?)<\/\1>/s);457 my ($outer_tagname,$inner_metadata_text) = ($metadata_text =~ m/<([^ >]+).*?>(.*?)<\/\1>/s); 451 458 # split tag into namespace and tag name 452 my($namespace,$top_level_prefix) = ($ wrapper_metadata_xml=~ m/^(.*?):(.*?)$/);459 my($namespace,$top_level_prefix) = ($outer_tagname =~ m/^(.*?):(.*?)$/); 453 460 # sometimes, the dc namespace is not specified as the prefix in each element (like <dc:title>) 454 461 # but is rather defined in the wrapper element containing the various dc meta elements, 455 462 # like <dc><title></title><creator></creator></dc>. 456 463 # In such a case, we use this wrapper element as the top_level_prefix 457 if(!defined $top_level_prefix && defined $wrapper_metadata_xml && $wrapper_metadata_xml =~ m/dc$/) { 458 $top_level_prefix = $wrapper_metadata_xml; 464 465 # if there was no prefix, then the tag itself becomes the top_level_prefix 466 if(!defined $top_level_prefix && defined $outer_tagname) { 467 $top_level_prefix = $outer_tagname; 459 468 } 460 469 461 if ($top_level_prefix !~ m/dc$/) { 462 print $outhandle "Warning: OAIPlugin currently only designed for Dublin Core (or variant) metadata\n"; 463 print $outhandle " This recorded metadata section '$top_level_prefix' does not appear to match.\n"; 464 print $outhandle " Metadata assumed to be in form: <prefix:tag>value</prefix:tag> and will be converted\n"; 465 print $outhandle " into Greenstone metadata as prefix.tag = value\n"; 466 } 467 470 #process each element one by one 468 471 while ($inner_metadata_text =~ m/<([^ >]+).*?>(.*?)<\/\1>(.*)/s) 469 472 { … … 473 476 $inner_metadata_text = $3; 474 477 475 # $metaname =~ s/^(dc:)?(.)/\u$2/; # strip off optional prefix and uppercase first letter478 # greenstone uses . for namespace, while oai uses : 476 479 $metaname =~ s/:/\./; 480 # if there is no namespace, then we use the outer tag name or 481 # namespace for this element 477 482 if ($metaname !~ m/\./) 478 483 { 479 484 $metaname = "$top_level_prefix.$metaname"; 480 485 } 481 482 $metaname = $self->remap_dcterms_metadata($metaname); 486 487 # if metadata set is auto, leave as is, otherwise convert to 488 # specified namespace 489 if ($self->{'metadata_set'} ne "auto") { 490 $metaname =~ s/^([^\.]*)\./$self->{'metadata_set'}\./; 491 if ($self->{'metadata_set'} eq "dc") { 492 # convert qualified dc terms to gs version, e.g. 493 # spatial becomes coverage^spatial 494 $metaname = $self->remap_dc_metadata($metaname); 495 } 496 } 483 497 484 498 # uppercase the first char of the name
Note:
See TracChangeset
for help on using the changeset viewer.