Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/PSP.pm

    r24107 r34921  
    1616use Image::ExifTool::Exif;
    1717
    18 $VERSION = '1.03';
     18$VERSION = '1.05';
    1919
    2020sub ProcessExtData($$$);
     
    6666    #18 => {
    6767    #    Name => 'PreviewImage',
     68    #    Groups => { 2 => 'Preview' },
    6869    #    RawConv => '$self->ValidateImage(\$val,$tag)',
    6970    #},
     
    163164sub ProcessExtData($$$)
    164165{
    165     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
     166    my ($et, $dirInfo, $tagTablePtr) = @_;
    166167    my $dataPt = $$dirInfo{DataPt};
    167168    my $dirLen = $$dirInfo{DirLen};
     
    170171    while ($pos + 10 < $dirLen) {
    171172        unless (substr($$dataPt, $pos, 4) eq "~FL\0") {
    172             $exifTool->Warn('Lost synchronization while reading sub blocks');
     173            $et->Warn('Lost synchronization while reading sub blocks');
    173174            last;
    174175        }
     
    177178        $pos += 10 + $len;
    178179        if ($pos > $dirLen) {
    179             $exifTool->Warn("Truncated sub block ID=$tag len=$len");
     180            $et->Warn("Truncated sub block ID=$tag len=$len");
    180181            last;
    181182        }
    182183        next unless $$tagTablePtr{$tag};
    183         my $tagInfo = $exifTool->GetTagInfo($tagTablePtr, $tag) or next;
     184        my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag) or next;
    184185        my $start = $pos - $len;
    185186        unless ($$tagInfo{Name} eq 'EXIFInfo') {
    186             $exifTool->HandleTag($tagTablePtr, $tag, undef,
     187            $et->HandleTag($tagTablePtr, $tag, undef,
    187188                TagInfo => $tagInfo,
    188189                DataPt  => $dataPt,
     
    213214        );
    214215        my $exifTable = GetTagTable($$tagInfo{SubDirectory}{TagTable});
    215         Image::ExifTool::Exif::ProcessExif($exifTool, \%dirInfo, $exifTable);
     216        Image::ExifTool::Exif::ProcessExif($et, \%dirInfo, $exifTable);
    216217        SetByteOrder('II');
    217218    }
     
    225226sub ProcessPSP($$)
    226227{
    227     my ($exifTool, $dirInfo) = @_;
     228    my ($et, $dirInfo) = @_;
    228229    my $raf = $$dirInfo{RAF};
    229230    my ($buff, $tag, $len, $err);
     
    231232                    $buff eq "Paint Shop Pro Image File\x0a\x1a\0\0\0\0\0" and
    232233                    $raf->Read($buff, 4) == 4;
    233     $exifTool->SetFileType();
     234    $et->SetFileType();
    234235    SetByteOrder('II');
    235236    my $tagTablePtr = GetTagTable('Image::ExifTool::PSP::Main');
     
    237238    # figure out block header length for this format PSP file
    238239    my $hlen = $a[0] > 3 ? 10 : 14;
    239     $$exifTool{PSPFileVersion} = $a[0]; # save for use in Condition
    240     $exifTool->HandleTag($tagTablePtr, FileVersion => "@a");
     240    $$et{PSPFileVersion} = $a[0]; # save for use in Condition
     241    $et->HandleTag($tagTablePtr, FileVersion => "@a");
    241242    # loop through blocks in file
    242243    my $pos = 36;
     
    244245        last unless $raf->Read($buff, $hlen) == $hlen;
    245246        unless ($buff =~ /^~BK\0/) {
    246             $exifTool->Warn('Lost synchronization while reading main PSP blocks');
     247            $et->Warn('Lost synchronization while reading main PSP blocks');
    247248            last;
    248249        }
     
    255256        }
    256257        $raf->Read($buff, $len) == $len or $err=1, last;
    257         $exifTool->HandleTag($tagTablePtr, $tag, $buff,
     258        $et->HandleTag($tagTablePtr, $tag, $buff,
    258259            DataPt  => \$buff,
    259260            DataPos => $pos - $len,
     
    261262        );
    262263    }
    263     $err and $exifTool->Warn("Truncated main block ID=$tag len=$len");
     264    $err and $et->Warn("Truncated main block ID=$tag len=$len");
    264265    return 1;
    265266}
     
    284285=head1 AUTHOR
    285286
    286 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     287Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    287288
    288289This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.