Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/ITC.pm

    r24107 r34921  
    1616use Image::ExifTool qw(:DataAccess :Utils);
    1717
    18 $VERSION = '1.00';
     18$VERSION = '1.02';
    1919
    2020sub ProcessITC($$);
     
    6969        Name => 'ImageType',
    7070        Format => 'undef[4]',
    71         PrintConv => {
     71        ValueConv => { # (not PrintConv because the unconverted JPEG value is nasty)
    7272            'PNGf' => 'PNG',
    7373            "\0\0\0\x0d" => 'JPEG',
     
    8484sub ProcessITC($$)
    8585{
    86     my ($exifTool, $dirInfo) = @_;
     86    my ($et, $dirInfo) = @_;
    8787    my $raf = $$dirInfo{RAF};
    8888    my $rtnVal = 0;
     
    106106            last unless $tag eq 'itch';
    107107            last unless $size >= 0x1c and $size < 0x10000;
    108             $exifTool->SetFileType();
     108            $et->SetFileType();
    109109            SetByteOrder('MM');
    110110            $rtnVal = 1;    # this is an ITC file
     
    122122            );
    123123            my $tagTablePtr = GetTagTable('Image::ExifTool::ITC::Header');
    124             $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
     124            $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
    125125        } elsif ($tag eq 'item') {
    126126            # don't want to read the entire item data (includes image)
     
    142142            $raf->Read($buff, $len) == $len or last;
    143143            unless ($len >= 0xb4 and substr($buff, 0xb0, 4) eq 'data') {
    144                 $exifTool->Warn('Parsing error. Please submit this ITC file for testing');
     144                $et->Warn('Parsing error. Please submit this ITC file for testing');
    145145                last;
    146146            }
     
    151151            );
    152152            $tagTablePtr = GetTagTable('Image::ExifTool::ITC::Item');
    153             $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
     153            $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
    154154            # extract embedded image
    155155            $pos += $len;
    156156            if ($size > 0) {
    157157                $tagTablePtr = GetTagTable('Image::ExifTool::ITC::Main');
    158                 my $tagInfo = $exifTool->GetTagInfo($tagTablePtr, 'data');
    159                 my $image = $exifTool->ExtractBinary($pos, $size, $$tagInfo{Name});
    160                 $exifTool->FoundTag($tagInfo, \$image);
     158                my $tagInfo = $et->GetTagInfo($tagTablePtr, 'data');
     159                my $image = $et->ExtractBinary($pos, $size, $$tagInfo{Name});
     160                $et->FoundTag($tagInfo, \$image);
    161161                # skip the rest of the block if necessary
    162162                $raf->Seek($pos+$size, 0) or last
     
    165165            }
    166166        } else {
    167             $exifTool->VPrint(0, "Unknown $tag block ($size bytes)\n");
     167            $et->VPrint(0, "Unknown $tag block ($size bytes)\n");
    168168            $raf->Seek($size-8, 1) or last;
    169169        }
    170170    }
    171     $err and $exifTool->Warn('ITC file format error');
     171    $err and $et->Warn('ITC file format error');
    172172    return $rtnVal;
    173173}
     
    192192=head1 AUTHOR
    193193
    194 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     194Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    195195
    196196This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.