Changeset 21800 for main/trunk


Ignore:
Timestamp:
2010-03-17T12:33:06+13:00 (14 years ago)
Author:
kjdon
Message:

added a new option, metadata_field_separator, which specifies what to split on for multi-valued metadata

Location:
main/trunk/greenstone2/perllib
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r20791 r21800  
    7575    'type' => "string",
    7676    'deft' => "Title" },
     77      { 'name' => "metadata_field_separator",
     78    'desc' => "{HTMLPlugin.metadata_field_separator}",
     79    'type' => "string",
     80    'deft' => "" },
    7781      { 'name' => "hunt_creator_metadata",
    7882    'desc' => "{HTMLPlugin.hunt_creator_metadata}",
     
    923927    return if (!defined $self->{'metadata_fields'});
    924928
     929    my $separator = $self->{'metadata_field_separator'};
     930    if ($separator eq "") {
     931    undef $separator;
     932    }
     933
    925934    # metadata fields to extract/save. 'key' is the (lowercase) name of the
    926935    # html meta, 'value' is the metadata name for greenstone to use
     
    10361045    #    $tag = lc($tag);
    10371046    #}
    1038 
    1039     $doc_obj->add_utf8_metadata($section, $tag, $value);
    1040 
     1047    if (defined $separator) {
     1048        my @values = split($separator, $value);
     1049        foreach my $v (@values) {
     1050        $doc_obj->add_utf8_metadata($section, $tag, $v) if $v =~ /\S/;
     1051        }
     1052    }
     1053    else {
     1054        $doc_obj->add_utf8_metadata($section, $tag, $value);
     1055    }
    10411056    }
    10421057   
  • main/trunk/greenstone2/perllib/plugins/PDFPlugin.pm

    r20790 r21800  
    7272       'type' => "string",
    7373       'deft' => "" },
     74      { 'name' => "metadata_field_separator",
     75    'desc' => "{HTMLPlugin.metadata_field_separator}",
     76    'type' => "string",
     77    'deft' => "" },
    7478     { 'name' => "noimages",
    7579       'desc' => "{PDFPlugin.noimages}",
     
    174178    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
    175179    }
    176     #push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     180    if (defined $self->{'metadata_field_separator'} && $self->{'metadata_field_separator'} =~ /\S/) {
     181    push(@$html_options,"-metadata_field_separator",$self->{'metadata_field_separator'});
     182    }
    177183       
    178184    if ($self->{'use_sections'} || $self->{'description_tags'}) {
  • main/trunk/greenstone2/perllib/strings.properties

    r21765 r21800  
    923923HTMLPlugin.metadata_fields:Comma separated list of metadata fields to attempt to extract. Capitalise this as you want the metadata capitalised in Greenstone, since the tag extraction is case insensitive. e.g. Title,Date. Use 'tag<tagname>' to have the contents of the first <tag> pair put in a metadata element called 'tagname'. e.g. Title,Date,Author<Creator>
    924924
     925HTMLPlugin.metadata_field_separator:Separator character used in multi-valued metadata. Will split a metadata field value on this character, and add each item as individual metadata.
     926
    925927HTMLPlugin.no_metadata:Don't attempt to extract any metadata from files.
    926928
Note: See TracChangeset for help on using the changeset viewer.