Changeset 21800

Show
Ignore:
Timestamp:
17.03.2010 12:33:06 (10 years ago)
Author:
kjdon
Message:

added a new option, metadata_field_separator, which specifies what to split on for multi-valued metadata

Location:
main/trunk/greenstone2/perllib
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r20791 r21800  
    7575    'type' => "string", 
    7676    'deft' => "Title" }, 
     77      { 'name' => "metadata_field_separator", 
     78    'desc' => "{HTMLPlugin.metadata_field_separator}", 
     79    'type' => "string", 
     80    'deft' => "" }, 
    7781      { 'name' => "hunt_creator_metadata", 
    7882    'desc' => "{HTMLPlugin.hunt_creator_metadata}", 
     
    923927    return if (!defined $self->{'metadata_fields'}); 
    924928 
     929    my $separator = $self->{'metadata_field_separator'}; 
     930    if ($separator eq "") { 
     931    undef $separator; 
     932    } 
     933 
    925934    # metadata fields to extract/save. 'key' is the (lowercase) name of the 
    926935    # html meta, 'value' is the metadata name for greenstone to use 
     
    10361045    #    $tag = lc($tag); 
    10371046    #} 
    1038  
    1039     $doc_obj->add_utf8_metadata($section, $tag, $value); 
    1040  
     1047    if (defined $separator) { 
     1048        my @values = split($separator, $value); 
     1049        foreach my $v (@values) { 
     1050        $doc_obj->add_utf8_metadata($section, $tag, $v) if $v =~ /\S/; 
     1051        } 
     1052    } 
     1053    else { 
     1054        $doc_obj->add_utf8_metadata($section, $tag, $value); 
     1055    } 
    10411056    } 
    10421057     
  • main/trunk/greenstone2/perllib/plugins/PDFPlugin.pm

    r20790 r21800  
    7272       'type' => "string", 
    7373       'deft' => "" }, 
     74      { 'name' => "metadata_field_separator", 
     75    'desc' => "{HTMLPlugin.metadata_field_separator}", 
     76    'type' => "string", 
     77    'deft' => "" }, 
    7478     { 'name' => "noimages", 
    7579       'desc' => "{PDFPlugin.noimages}", 
     
    174178    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
    175179    } 
    176     #push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
     180    if (defined $self->{'metadata_field_separator'} && $self->{'metadata_field_separator'} =~ /\S/) { 
     181    push(@$html_options,"-metadata_field_separator",$self->{'metadata_field_separator'}); 
     182    } 
    177183         
    178184    if ($self->{'use_sections'} || $self->{'description_tags'}) { 
  • main/trunk/greenstone2/perllib/strings.properties

    r21765 r21800  
    923923HTMLPlugin.metadata_fields:Comma separated list of metadata fields to attempt to extract. Capitalise this as you want the metadata capitalised in Greenstone, since the tag extraction is case insensitive. e.g. Title,Date. Use 'tag<tagname>' to have the contents of the first <tag> pair put in a metadata element called 'tagname'. e.g. Title,Date,Author<Creator> 
    924924 
     925HTMLPlugin.metadata_field_separator:Separator character used in multi-valued metadata. Will split a metadata field value on this character, and add each item as individual metadata. 
     926 
    925927HTMLPlugin.no_metadata:Don't attempt to extract any metadata from files. 
    926928