Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/PICT.pm

    r24107 r34921  
    2121use Image::ExifTool qw(:DataAccess :Utils);
    2222
    23 $VERSION = '1.03';
     23$VERSION = '1.05';
    2424
    2525sub ReadPictValue($$$;$);
     
    274274# PICT image opcodes
    275275%Image::ExifTool::PICT::Main = (
    276     PROCESS_PROC => 0,  # set this to zero to omit tags from lookup
     276    VARS => { NO_LOOKUP => 1 }, # omit tags from lookup
    277277    NOTES => q{
    278278The PICT format contains no true meta information, except for the possible
     
    822822    },
    823823    0x00a1 => [
    824         # this list for documentation only [not currently extracted]
     824        # this list for documentation only [currently not extracted]
    825825        {
    826826            # (not actually a full Photohop IRB record it appears, but it does start
     
    10251025                        my %parms = ( Out => $out );
    10261026                        $parms{MaxLen} = 96 if $verbose < 4;
    1027                         Image::ExifTool::HexDump(\$val, undef, %parms);
     1027                        HexDump(\$val, undef, %parms);
    10281028                    }
    10291029                } else {
     
    10551055                        my %parms = ( Out => $out );
    10561056                        $parms{MaxLen} = 96 if $verbose < 4;
    1057                         Image::ExifTool::HexDump($val, undef, %parms);
     1057                        HexDump($val, undef, %parms);
    10581058                    }
    10591059                }
     
    10821082sub ProcessPICT($$)
    10831083{
    1084     my ($exifTool, $dirInfo) = @_;
     1084    my ($et, $dirInfo) = @_;
    10851085    my $raf = $$dirInfo{RAF};
    1086     $verbose = $exifTool->Options('Verbose');
    1087     $out = $exifTool->Options('TextOut');
     1086    $verbose = $et->Options('Verbose');
     1087    $out = $et->Options('TextOut');
    10881088    $indent = '';
    10891089    my ($buff, $tried, @hdr, $op, $hRes, $vRes);
     
    10961096        $op = pop @hdr;
    10971097        # check for PICT version 1 format
    1098         if ($op eq 0x1101) {
     1098        if ($op == 0x1101) {
    10991099            $vers = 1;
    11001100            undef $extended;
     
    11021102        }
    11031103        # check for PICT version 2 format
    1104         if ($op eq 0x0011) {
     1104        if ($op == 0x0011) {
    11051105            $raf->Read($buff, 28) == 28 or return 0;
    11061106            if ($buff =~ /^\x02\xff\x0c\x00\xff\xff/) {
     
    11391139        $h = int($h * $vRes / 72 + 0.5);
    11401140    }
    1141     $exifTool->SetFileType();
    1142     $exifTool->FoundTag('ImageWidth', $w);
    1143     $exifTool->FoundTag('ImageHeight', $h);
    1144     $exifTool->FoundTag('XResolution', $hRes) if $hRes;
    1145     $exifTool->FoundTag('YResolution', $vRes) if $vRes;
     1141    $et->SetFileType();
     1142    $et->FoundTag('ImageWidth', $w);
     1143    $et->FoundTag('ImageHeight', $h);
     1144    $et->FoundTag('XResolution', $hRes) if $hRes;
     1145    $et->FoundTag('YResolution', $vRes) if $vRes;
    11461146
    11471147    # don't extract image opcodes unless verbose
    1148     return 1 unless $verbose or $exifTool->Options('Unknown');
     1148    return 1 unless $verbose or $et->Options('Unknown');
    11491149
    11501150    $verbose and printf $out "PICT version $vers%s\n", $extended ? ' extended' : '';
     
    11631163            $op = unpack('n', $buff);
    11641164        }
    1165         my $tagInfo = $exifTool->GetTagInfo($tagTablePtr, $op);
     1165        my $tagInfo = $et->GetTagInfo($tagTablePtr, $op);
    11661166        unless ($tagInfo) {
    11671167            my $i;
     
    11701170                next unless $op >= $reserved[$i];
    11711171                last if $op > $reserved[$i+1];
    1172                 $tagInfo = $exifTool->GetTagInfo($tagTablePtr, $reserved[$i]);
     1172                $tagInfo = $et->GetTagInfo($tagTablePtr, $reserved[$i]);
    11731173                last;
    11741174            }
    11751175            last unless $tagInfo;
    11761176        }
    1177         if ($op eq 0xff) {
     1177        if ($op == 0xff) {
    11781178            $verbose and print $out "End of picture\n";
    11791179            $success = 1;
     
    11821182        my $format = $$tagInfo{Format};
    11831183        unless ($format) {
    1184             $exifTool->Warn("Missing format for $$tagInfo{Name}");
     1184            $et->Warn("Missing format for $$tagInfo{Name}");
    11851185            last;
    11861186        }
     
    11911191        my $val = ReadPictValue($raf, $$tagInfo{Name}, $format);
    11921192        unless (defined $val) {
    1193             $exifTool->Warn("Error reading $$tagInfo{Name} information");
     1193            $et->Warn("Error reading $$tagInfo{Name} information");
    11941194            last;
    11951195        }
     
    12011201                $val->{imageDescr}->{compressor} eq 'Photo - JPEG' and
    12021202                ref $val->{imageData} eq 'SCALAR' and
    1203                 $exifTool->ValidateImage($val->{imageData}, 'PreviewImage'))
     1203                $et->ValidateImage($val->{imageData}, 'PreviewImage'))
    12041204            {
    1205                 $exifTool->FoundTag('PreviewImage', $val->{imageData});
     1205                $et->FoundTag('PreviewImage', $val->{imageData});
    12061206            }
    12071207        } else {
    1208             # $exifTool->FoundTag($tagInfo, $val);
     1208            # $et->FoundTag($tagInfo, $val);
    12091209        }
    12101210    }
    1211     $success or $exifTool->Warn('End of picture not found');
     1211    $success or $et->Warn('End of picture not found');
    12121212    return 1;
    12131213}
     
    12371237=head1 AUTHOR
    12381238
    1239 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     1239Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    12401240
    12411241This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.