Changeset 14940


Ignore:
Timestamp:
2008-01-11T14:09:55+13:00 (16 years ago)
Author:
dmn
Message:

updates for qualified DC

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/OAIPlug.pm

    r13886 r14940  
    326326
    327327
     328sub remap_dcterms_metadata
     329{
     330    my $self = shift(@_);
     331
     332    my ($metaname) = @_;
     333
     334    my $dcterm_mapping = {
     335    "alternative" => "dc.title",
     336    "tableOfContents" => "dc.description",
     337    "abstract" => "dc.description",
     338    "created" => "dc.date",
     339    "valid" => "dc.date",
     340    "available" => "dc.date",
     341    "issued" => "dc.date",
     342    "modified" => "dc.date",
     343    "dateAccepted" => "dc.date",
     344    "dateCopyrighted" => "dc.date",
     345    "dateSubmitted" => "dc.date",
     346    "extent" => "dc.format",
     347    "medium" => "dc.format",
     348    "isVersionOf" => "dc.relation",
     349    "hasVersion" => "dc.relation",
     350    "isReplacedBy" => "dc.relation",
     351    "replaces" => "dc.relation",
     352    "isRequiredBy" => "dc.relation",
     353    "requires" => "dc.relation",
     354    "isPartOf" => "dc.relation",
     355    "hasPart" => "dc.relation",
     356    "isReferencedBy" => "dc.relation",
     357    "references" => "dc.relation",
     358    "isFormatOf" => "dc.relation",
     359    "hasFormat" => "dc.relation",
     360    "conformsTo" => "dc.relation",
     361    "spatial" => "dc.coverage",
     362    "temporal" => "dc.coverage",
     363    "audience" => "dc.any",
     364    "accrualMethod" => "dc.any",
     365    "accrualPeriodicity" => "dc.any",
     366    "accrualPolicy" => "dc.any",
     367    "instructionalMethod" => "dc.any",
     368    "provenance" => "dc.any",
     369    "rightsHolder" => "dc.any",
     370    "mediator" => "audience",
     371    "educationLevel" => "audience",
     372    "accessRights" => "dc.rights",
     373    "license" => "dc.rights",
     374    "bibliographicCitation" => "dc.identifier"
     375    };
     376
     377    my ($prefix,$name) = ($metaname =~ m/^(.*?)\.(.*?)$/);
     378   
     379    if ($prefix eq "dcterms")
     380    {
     381    if (defined $dcterm_mapping->{$name})
     382    {
     383        return $dcterm_mapping->{$name}."^".$name;
     384    }
     385
     386    }
     387    return $metaname; # didn't get a match, return param passed in unchanged
     388}
    328389
    329390
     
    340401    {
    341402    my $metadata_text = $1;
    342     $metadata_text =~ s/^.*?<(oai_dc:)?dc.*?>(.*?)<\/(oai_dc:)?dc>.*?/$2/s;
    343 
    344     while ($metadata_text =~ m/<(.*?)>(.*?)<\/(.*?)>(.*)/s)
     403
     404    # locate and remove outermost tag (ignoring any attribute information in top-level tag)
     405    my ($wrapper_metadata_xml,$inner_metadata_text) = ($metadata_text =~ m/<([^ >]+).*?>(.*?)<\/\1>/s);
     406
     407    # split tag into namespace and tag name
     408    my($namespace,$top_level_prefix) = ($wrapper_metadata_xml =~ m/^(.*?):(.*?)$/);
     409
     410    if ($top_level_prefix !~ /dc$/) {
     411        print $outhandle "Warning: OAIPlug currently only designed for Dublin Core (or variant) metadata\n";
     412        print $outhandle "         This recorded metadata section '$top_level_prefix' does not appear to match.\n";
     413        print $outhandle "         Metadata assumed to be in form: <prefix:tag>value</prefix:tag> and will be converted\n";
     414        print $outhandle "         into Greenstone metadata as prefix.tag = value\n";
     415    }
     416
     417    while ($inner_metadata_text =~ m/<([^ >]+).*?>(.*?)<\/(.*?)>(.*)/s)
    345418    {
    346419        # if URL given for document as identifier metadata, store it ...
     
    349422        my $metaname = $1;
    350423        my $metavalue = $2;
    351         $metadata_text = $4;
    352        
    353         $metaname =~ s/^(dc:)?(.)/\u$2/;
    354 
    355         if ($metaname eq "Identifier")
     424        $inner_metadata_text = $4;
     425       
     426        # $metaname =~ s/^(dc:)?(.)/\u$2/; # strip of optional prefix and uppercase first letter
     427        $metaname =~ s/:/\./;
     428        if ($metaname !~ m/\./)
    356429        {
    357         # name clashes with GSDL reserved metadata name for hash id
    358         $metaname = "URL";
     430        $metaname = "$top_level_prefix.$metaname";
    359431        }
     432
     433
     434        $metaname = $self->remap_dcterms_metadata($metaname);
     435
     436#       if ($metaname eq "Identifier")
     437#       {
     438#       # name clashes with GSDL reserved metadata name for hash id
     439#       $metaname = "URL";
     440#       }
    360441
    361442        if (defined $metadata->{$metaname})
Note: See TracChangeset for help on using the changeset viewer.