Changeset 7287


Ignore:
Timestamp:
2004-05-06T17:28:54+12:00 (20 years ago)
Author:
mdewsnip
Message:

Now extracts date metadata from the output of pdftohtml 0.36 and converts it to Greenstone format. Hasn't been tested :-)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r7107 r7287  
    221221    print $outhandle "PDFPlug: passing $_[3] on to $self->{'converted_to'}Plug\n"
    222222    if $self->{'verbosity'} > 1;
    223    
    224     return ConvertToPlug::process_type($self,"pdf",@_);
     223
     224    # Extracted the date field from pdftohtml's output
     225    $self->{'metadata_fields'} .= ",date";
     226
     227    my $result = ConvertToPlug::process_type($self,"pdf",@_);
     228
     229    # Convert all date metadata extracted by pdftohtml into Greenstone format
     230    my $doc_obj = pop(@_);
     231    foreach $datemeta (@{$doc_obj->get_metadata($cursection, "date")}) {
     232    $doc_obj->delete_metadata($cursection, "date", $datemeta);
     233
     234    # We're just interested in the date bit, not the time
     235    $datemeta =~ /(\d\d\d\d)-(\d\d)-(\d\d).*/;
     236    $doc_obj->add_utf8_metadata($cursection, "Date", "$1$2$3");
     237    }
     238
     239    return $result;
    225240}
    226241
Note: See TracChangeset for help on using the changeset viewer.