Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/BigTIFF.pm

    r24107 r34921  
    1616use Image::ExifTool::Exif;
    1717
    18 $VERSION = '1.05';
     18$VERSION = '1.07';
    1919
    2020my $maxOffset = 0x7fffffff; # currently supported maximum data offset/size
     
    2626sub ProcessBigIFD($$$)
    2727{
    28     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
     28    my ($et, $dirInfo, $tagTablePtr) = @_;
    2929    my $raf = $$dirInfo{RAF};
    30     my $verbose = $exifTool->{OPTIONS}->{Verbose};
    31     my $htmlDump = $exifTool->{HTML_DUMP};
     30    my $verbose = $$et{OPTIONS}{Verbose};
     31    my $htmlDump = $$et{HTML_DUMP};
    3232    my $dirName = $$dirInfo{DirName};
    3333    my $dirStart = $$dirInfo{DirStart};
    34 
    35     $verbose = -1 if $htmlDump; # mix htmlDump into verbose so we can test for both at once
     34    my ($offName, $nextOffName);
     35
     36    if ($htmlDump) {
     37        $verbose = -1;  # mix htmlDump into verbose so we can test for both at once
     38        $offName = $$dirInfo{OffsetName};
     39    }
    3640
    3741    # loop through IFD chain
    3842    for (;;) {
    39         if ($dirStart > $maxOffset and not $exifTool->Options('LargeFileSupport')) {
    40             $exifTool->Warn('Huge offsets not supported (LargeFileSupport not set)');
     43        if ($dirStart > $maxOffset and not $et->Options('LargeFileSupport')) {
     44            $et->Warn('Huge offsets not supported (LargeFileSupport not set)');
    4145            last;
    4246        }
    4347        unless ($raf->Seek($dirStart, 0)) {
    44             $exifTool->Warn("Bad $dirName offset");
     48            $et->Warn("Bad $dirName offset");
    4549            return 0;
    4650        }
    4751        my ($dirBuff, $index);
    4852        unless ($raf->Read($dirBuff, 8) == 8) {
    49             $exifTool->Warn("Truncated $dirName count");
     53            $et->Warn("Truncated $dirName count");
    5054            return 0;
    5155        }
    5256        my $numEntries = Image::ExifTool::Get64u(\$dirBuff, 0);
    53         $verbose > 0 and $exifTool->VerboseDir($dirName, $numEntries);
     57        $verbose > 0 and $et->VerboseDir($dirName, $numEntries);
    5458        my $bsize = $numEntries * 20;
    5559        if ($bsize > $maxOffset) {
    56             $exifTool->Warn('Huge directory counts not yet supported');
     60            $et->Warn('Huge directory counts not yet supported');
    5761            last;
    5862        }
    5963        my $bufPos = $raf->Tell();
    6064        unless ($raf->Read($dirBuff, $bsize) == $bsize) {
    61             $exifTool->Warn("Truncated $dirName directory");
     65            $et->Warn("Truncated $dirName directory");
    6266            return 0;
    6367        }
     
    6569        $raf->Read($nextIFD, 8) == 8 or undef $nextIFD; # try to read next IFD pointer
    6670        if ($htmlDump) {
    67             $exifTool->HDump($bufPos-8, 8, "$dirName entries", "Entry count: $numEntries");
     71            $et->HDump($bufPos-8, 8, "$dirName entries", "Entry count: $numEntries", undef, $offName);
    6872            if (defined $nextIFD) {
    69                 my $tip = sprintf("Offset: 0x%.8x", Image::ExifTool::Get64u(\$nextIFD, 0));
    70                 $exifTool->HDump($bufPos + 20 * $numEntries, 8, "Next IFD", $tip, 0);
     73                my $off = Image::ExifTool::Get64u(\$nextIFD, 0);
     74                my $tip = sprintf("Offset: 0x%.8x", $off);
     75                my $id = $offName;
     76                ($nextOffName, $id) = Image::ExifTool::Exif::NextOffsetName($et, $id) if $off;
     77                $et->HDump($bufPos + 20 * $numEntries, 8, "Next IFD", $tip, 0, $id);
    7178            }
    7279        }
     
    7986            my $formatSize = $Image::ExifTool::Exif::formatSize[$format];
    8087            unless (defined $formatSize) {
    81                 $exifTool->HDump($bufPos+$entry,20,"[invalid IFD entry]",
    82                          "Bad format value: $format", 1);
     88                $et->HDump($bufPos+$entry,20,"[invalid IFD entry]",
     89                           "Bad format value: $format", 1, $offName);
    8390                # warn unless the IFD was just padded with zeros
    84                 $exifTool->Warn(sprintf("Unknown format ($format) for $dirName tag 0x%x",$tagID));
     91                $et->Warn(sprintf("Unknown format ($format) for $dirName tag 0x%x",$tagID));
    8592                return 0; # assume corrupted IFD
    8693            }
    8794            my $formatStr = $Image::ExifTool::Exif::formatName[$format];
    8895            my $size = $count * $formatSize;
    89             my $tagInfo = $exifTool->GetTagInfo($tagTablePtr, $tagID);
     96            my $tagInfo = $et->GetTagInfo($tagTablePtr, $tagID);
    9097            next unless defined $tagInfo or $verbose;
    9198            my $valuePtr = $entry + 12;
    92             my ($valBuff, $valBase);
     99            my ($valBuff, $valBase, $rational, $subOffName);
    93100            if ($size > 8) {
    94101                if ($size > $maxOffset) {
    95                     $exifTool->Warn("Can't handle $dirName entry $index (huge size)");
     102                    $et->Warn("Can't handle $dirName entry $index (huge size)");
    96103                    next;
    97104                }
    98105                $valuePtr = Image::ExifTool::Get64u(\$dirBuff, $valuePtr);
    99                 if ($valuePtr > $maxOffset and not $exifTool->Options('LargeFileSupport')) {
    100                     $exifTool->Warn("Can't handle $dirName entry $index (LargeFileSupport not set)");
     106                if ($valuePtr > $maxOffset and not $et->Options('LargeFileSupport')) {
     107                    $et->Warn("Can't handle $dirName entry $index (LargeFileSupport not set)");
    101108                    next;
    102109                }
    103110                unless ($raf->Seek($valuePtr, 0) and $raf->Read($valBuff, $size) == $size) {
    104                     $exifTool->Warn("Error reading $dirName entry $index");
     111                    $et->Warn("Error reading $dirName entry $index");
    105112                    next;
    106113                }
     
    112119            if (defined $tagInfo and not $tagInfo) {
    113120                # GetTagInfo() required the value for a Condition
    114                 $tagInfo = $exifTool->GetTagInfo($tagTablePtr, $tagID, \$valBuff);
    115             }
    116             my $val = ReadValue(\$valBuff, 0, $formatStr, $count, $size);
     121                $tagInfo = $et->GetTagInfo($tagTablePtr, $tagID, \$valBuff);
     122            }
     123            my $val = ReadValue(\$valBuff, 0, $formatStr, $count, $size, \$rational);
    117124            if ($htmlDump) {
    118125                my $tval = $val;
    119                 if ($formatStr =~ /^rational64([su])$/) {
    120                     # show numerator/denominator separately
    121                     my $f = ReadValue(\$valBuff, 0, "int32$1", $count*2, $size);
    122                     $f =~ s/(-?\d+) (-?\d+)/$1\/$2/g;
    123                     $tval .= " ($f)";
    124                 }
     126                # show numerator/denominator separately for rational numbers
     127                $tval .= " ($rational)" if defined $rational;
    125128                my ($tagName, $colName);
    126129                if ($tagID == 0x927c and $dirName eq 'ExifIFD') {
     
    153156                }
    154157                $tip .= "Value: $tval";
    155                 $exifTool->HDump($entry+$bufPos, 20, "$dname $colName", $tip, 1);
     158                my ($id, $sid);
     159                if ($tagInfo and $$tagInfo{SubIFD}) {
     160                    ($subOffName, $id, $sid) = Image::ExifTool::Exif::NextOffsetName($et, $offName);
     161                } else {
     162                    $id = $offName;
     163                }
     164                $et->HDump($entry+$bufPos, 20, "$dname $colName", $tip, 1, $id);
    156165                if ($size > 8) {
    157166                    # add value data block
    158167                    my $flg = ($tagInfo and $$tagInfo{SubDirectory} and $$tagInfo{MakerNotes}) ? 4 : 0;
    159                     $exifTool->HDump($valuePtr,$size,"$tagName value",'SAME', $flg);
     168                    $et->HDump($valuePtr,$size,"$tagName value",'SAME', $flg, $sid);
    160169                }
    161170            }
    162171            if ($tagInfo and $$tagInfo{SubIFD}) {
    163172                # process all SubIFD's as BigTIFF
    164                 $verbose > 0 and $exifTool->VerboseInfo($tagID, $tagInfo,
     173                $verbose > 0 and $et->VerboseInfo($tagID, $tagInfo,
    165174                    Table   => $tagTablePtr,
    166175                    Index   => $index,
     
    179188                    $subdirName .= $i if $i;
    180189                    my %subdirInfo = (
    181                         RAF      => $raf,
    182                         DataPos  => 0,
    183                         DirStart => $offsets[$i],
    184                         DirName  => $subdirName,
    185                         Parent   => $dirInfo,
     190                        RAF        => $raf,
     191                        DataPos    => 0,
     192                        DirStart   => $offsets[$i],
     193                        DirName    => $subdirName,
     194                        Parent     => $dirName,
     195                        OffsetName => $subOffName,
    186196                    );
    187                     $exifTool->ProcessDirectory(\%subdirInfo, $tagTablePtr, \&ProcessBigIFD);
     197                    $et->ProcessDirectory(\%subdirInfo, $tagTablePtr, \&ProcessBigIFD);
    188198                }
    189199            } else {
    190                 my $tagKey = $exifTool->HandleTag($tagTablePtr, $tagID, $val,
     200                my $tagKey = $et->HandleTag($tagTablePtr, $tagID, $val,
    191201                    Index   => $index,
    192202                    DataPt  => \$valBuff,
     
    198208                    RAF     => $raf,
    199209                );
    200                 $tagKey and $exifTool->SetGroup($tagKey, $dirName);
     210                $tagKey and $et->SetGroup($tagKey, $dirName);
    201211            }
    202212        }
    203213        last unless $dirName =~ /^(IFD|SubIFD)(\d*)$/;
    204214        $dirName = $1 . (($2 || 0) + 1);
    205         defined $nextIFD or $exifTool->Warn("Bad $dirName pointer"), return 0;
     215        defined $nextIFD or $et->Warn("Bad $dirName pointer"), return 0;
    206216        $dirStart = Image::ExifTool::Get64u(\$nextIFD, 0);
    207217        $dirStart or last;
     218        $offName = $nextOffName;
    208219    }
    209220    return 1;
     
    216227sub ProcessBTF($$)
    217228{
    218     my ($exifTool, $dirInfo) = @_;
     229    my ($et, $dirInfo) = @_;
    219230    my $raf = $$dirInfo{RAF};
    220231    my $buff;
     
    223234    return 0 unless $buff =~ /^(MM\0\x2b\0\x08\0\0|II\x2b\0\x08\0\0\0)/;
    224235    if ($$dirInfo{OutFile}) {
    225         $exifTool->Error('ExifTool does not support writing of BigTIFF images');
     236        $et->Error('ExifTool does not support writing of BigTIFF images');
    226237        return 1;
    227238    }
    228     $exifTool->SetFileType('BTF'); # set the FileType tag
     239    $et->SetFileType('BTF'); # set the FileType tag
    229240    SetByteOrder(substr($buff, 0, 2));
    230241    my $offset = Image::ExifTool::Get64u(\$buff, 8);
    231     if ($exifTool->{HTML_DUMP}) {
     242    if ($$et{HTML_DUMP}) {
    232243        my $o = (GetByteOrder() eq 'II') ? 'Little' : 'Big';
    233         $exifTool->HDump(0, 8, "BigTIFF header", "Byte order: $o endian", 0);
    234         $exifTool->HDump(8, 8, "IFD0 pointer", sprintf("Offset: 0x%.8x",$offset), 0);
     244        $et->HDump(0, 8, "BigTIFF header", "Byte order: $o endian", 0);
     245        $et->HDump(8, 8, "IFD0 pointer", sprintf("Offset: 0x%.8x",$offset), 0);
    235246    }
    236247    my %dirInfo = (
     
    242253    );
    243254    my $tagTablePtr = GetTagTable('Image::ExifTool::Exif::Main');
    244     $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr, \&ProcessBigIFD);
     255    $et->ProcessDirectory(\%dirInfo, $tagTablePtr, \&ProcessBigIFD);
    245256    return 1;
    246257}
     
    265276=head1 AUTHOR
    266277
    267 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     278Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    268279
    269280This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.