Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Rawzor.pm

    r24107 r34921  
    1515use Image::ExifTool qw(:DataAccess :Utils);
    1616
    17 $VERSION = '1.01';
     17$VERSION = '1.05';
    1818
    1919# currently support this version Rawzor images
     
    5252sub ProcessRWZ($$)
    5353{
    54     my ($exifTool, $dirInfo) = @_;
     54    my ($et, $dirInfo) = @_;
    5555    my $raf = $$dirInfo{RAF};
    5656    my ($buff, $buf2);
     
    7272    my $origSize = Get64u(\$buff, 18);
    7373    my $tagTablePtr = GetTagTable('Image::ExifTool::Rawzor::Main');
    74     $exifTool->HandleTag($tagTablePtr, RawzorRequiredVersion => $reqVers);
    75     $exifTool->HandleTag($tagTablePtr, RawzorCreatorVersion => $creatorVers);
    76     $exifTool->HandleTag($tagTablePtr, OriginalFileSize => $origSize);
    77     $exifTool->HandleTag($tagTablePtr, CompressionFactor => $origSize/$rwzSize) if $rwzSize;
     74    $et->HandleTag($tagTablePtr, RawzorRequiredVersion => $reqVers);
     75    $et->HandleTag($tagTablePtr, RawzorCreatorVersion => $creatorVers);
     76    $et->HandleTag($tagTablePtr, OriginalFileSize => $origSize);
     77    $et->HandleTag($tagTablePtr, CompressionFactor => $origSize/$rwzSize) if $rwzSize;
    7878    # check version numbers
    7979    if ($reqVers > $implementedRawzorVersion) {
    80         $exifTool->Warn("Version $reqVers Rawzor images not yet supported");
     80        $et->Warn("Version $reqVers Rawzor images not yet supported");
    8181        return 1;
    8282    }
    8383    my $metaOffset = Get64u(\$buff, 38);
    8484    if ($metaOffset > 0x7fffffff) {
    85         $exifTool->Warn('Bad metadata offset');
     85        $et->Warn('Bad metadata offset');
    8686        return 1;
    8787    }
    8888    # check for the ability to uncompress the information
    89     unless (eval 'require IO::Uncompress::Bunzip2') {
    90         $exifTool->Warn('Install IO::Compress::Bzip2 to decode Rawzor bzip2 compression');
     89    unless (eval { require IO::Uncompress::Bunzip2 }) {
     90        $et->Warn('Install IO::Compress::Bzip2 to decode Rawzor bzip2 compression');
    9191        return 1;
    9292    }
     
    100100    # 40 int32u - compressed metadata size
    101101    unless ($raf->Seek($metaOffset, 0) and $raf->Read($buff, 44) == 44) {
    102         $exifTool->Warn('Error reading metadata header');
     102        $et->Warn('Error reading metadata header');
    103103        return 1;
    104104    }
    105105    my $metaSize = Get32u(\$buff, 36);
    106106    if ($metaSize) {
     107        $$et{DontValidateImageData} = 1;
    107108        # validate the metadata header and read the compressed metadata
    108109        my $end0 = Get64u(\$buff, 0);
     
    115116            $end0 <= $pos1 and $pos1 <= $end1 and $end1 <= $pos2)
    116117        {
    117             $exifTool->Warn('Error reading image metadata');
     118            $et->Warn('Error reading image metadata');
    118119            return 1;
    119120        }
     
    122123            length($buf2) eq $metaSize)
    123124        {
    124             $exifTool->Warn('Error uncompressing image metadata');
     125            $et->Warn('Error uncompressing image metadata');
    125126            return 1;
    126127        }
     
    133134
    134135        # extract original information by calling ExtractInfo recursively
    135         $exifTool->ExtractInfo(\$buff, { ReEntry => 1 });
     136        $et->ExtractInfo(\$buff, { ReEntry => 1 });
    136137        undef $buff;
    137138    }
    138139    # set OriginalFileType from FileType of original file
    139140    # then change FileType and MIMEType to indicate a Rawzor image
    140     my $origFileType = $exifTool->{VALUE}->{FileType};
     141    my $origFileType = $$et{VALUE}{FileType};
    141142    if ($origFileType) {
    142         $exifTool->HandleTag($tagTablePtr, OriginalFileType => $origFileType);
    143         $exifTool->{VALUE}->{FileType} = 'RWZ';
    144         $exifTool->{VALUE}->{MIMEType} = 'image/x-rawzor';
     143        $et->HandleTag($tagTablePtr, OriginalFileType => $origFileType);
     144        $et->OverrideFileType('RWZ');
    145145    } else {
    146         $exifTool->HandleTag($tagTablePtr, OriginalFileType => 'Unknown');
    147         $exifTool->SetFileType();
     146        $et->HandleTag($tagTablePtr, OriginalFileType => 'Unknown');
     147        $et->SetFileType();
    148148    }
    149149    return 1;
     
    169169=head1 AUTHOR
    170170
    171 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     171Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    172172
    173173This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.