Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/ZIP.pm

    r24107 r34921  
    1010#               3) http://www.gzip.org/zlib/rfc-gzip.html
    1111#               4) http://DataCompression.info/ArchiveFormats/RAR202.txt
     12#               5) https://jira.atlassian.com/browse/CONF-21706
     13#               6) http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
    1214#------------------------------------------------------------------------------
    1315
     
    1820use Image::ExifTool qw(:DataAccess :Utils);
    1921
    20 $VERSION = '1.07';
     22$VERSION = '1.26';
    2123
    2224sub WarnProc($) { $warnString = $_[0]; }
     
    2426# file types for recognized Open Document "mimetype" values
    2527my %openDocType = (
     28    'application/vnd.oasis.opendocument.database'     => 'ODB', #5
     29    'application/vnd.oasis.opendocument.chart'        => 'ODC', #5
     30    'application/vnd.oasis.opendocument.formula'      => 'ODF', #5
     31    'application/vnd.oasis.opendocument.graphics'     => 'ODG', #5
     32    'application/vnd.oasis.opendocument.image'        => 'ODI', #5
    2633    'application/vnd.oasis.opendocument.presentation' => 'ODP',
    2734    'application/vnd.oasis.opendocument.spreadsheet'  => 'ODS',
    2835    'application/vnd.oasis.opendocument.text'         => 'ODT',
     36    'application/vnd.adobe.indesign-idml-package'     => 'IDML', #6 (not open doc)
     37    'application/epub+zip' => 'EPUB', #PH (not open doc)
     38);
     39
     40# iWork file types based on names of files found in the zip archive
     41my %iWorkFile = (
     42    'Index/Slide.iwa' => 'KEY',
     43    'Index/Tables/DataList.iwa' => 'NUMBERS',
     44);
     45
     46my %iWorkType = (
     47    NUMBERS => 'NUMBERS',
     48    PAGES   => 'PAGES',
     49    KEY     => 'KEY',
     50    KTH     => 'KTH',
     51    NMBTEMPLATE => 'NMBTEMPLATE',
    2952);
    3053
     
    3760        The following tags are extracted from ZIP archives.  ExifTool also extracts
    3861        additional meta information from compressed documents inside some ZIP-based
    39         files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODP, ODS
    40         and ODT), iWork (KEY, PAGES, NUMBERS), and Capture One Enhanced Image
    41         Package (EIP).  The ExifTool family 3 groups may be used to organize the
    42         output by embedded document number (ie. the exiftool C<-g3> option).
     62        files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC,
     63        ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One
     64        Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML),
     65        Electronic Publication (EPUB), and Sketch design files (SKETCH).  The
     66        ExifTool family 3 groups may be used to organize ZIP tags by embedded
     67        document number (ie. the exiftool C<-g3> option).
    4368    },
    4469    2 => 'ZipRequiredVersion',
     
    82107                ($val >> 11) & 0x1f, # hour
    83108                ($val >> 5)  & 0x3f, # minute
    84                  $val        & 0x1f  # second
     109                ($val        & 0x1f) * 2  # second
    85110            );
    86111        },
     
    200225                ($val >> 11) & 0x1f, # hour
    201226                ($val >> 5)  & 0x3f, # minute
    202                  $val        & 0x1f  # second
     227                ($val        & 0x1f) * 2  # second
    203228            );
    204229        },
     
    235260sub ProcessRAR($$)
    236261{
    237     my ($exifTool, $dirInfo) = @_;
     262    my ($et, $dirInfo) = @_;
    238263    my $raf = $$dirInfo{RAF};
    239264    my ($flags, $buff);
     
    241266    return 0 unless $raf->Read($buff, 7) and $buff eq "Rar!\x1a\x07\0";
    242267
    243     $exifTool->SetFileType();
     268    $et->SetFileType();
    244269    SetByteOrder('II');
    245270    my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR');
     
    258283        next unless $size;  # ignore blocks with no data
    259284        # don't try to read very large blocks unless LargeFileSupport is enabled
    260         if ($size > 0x80000000 and not $exifTool->Options('LargeFileSupport')) {
    261             $exifTool->Warn('Large block encountered. Aborting.');
     285        if ($size >= 0x80000000 and not $et->Options('LargeFileSupport')) {
     286            $et->Warn('Large block encountered. Aborting.');
    262287            last;
    263288        }
     
    269294            # add compressed size to start of data so we can extract it with the other tags
    270295            $buff = pack('V',$size) . $buff;
    271             $$exifTool{DOC_NUM} = ++$docNum;
    272             $exifTool->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
     296            $$et{DOC_NUM} = ++$docNum;
     297            $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
    273298            $size -= $n;
    274299        } elsif ($type == 0x75 and $size > 6) { # comment block
     
    276301            # save comment, only if "Stored" (this is untested)
    277302            if (Get8u(\$buff, 3) == 0x30) {
    278                 $exifTool->FoundTag('Comment', substr($buff, 6));
     303                $et->FoundTag('Comment', substr($buff, 6));
    279304            }
    280305            next;
     
    283308        $raf->Seek($size, 1) or last if $size;
    284309    }
    285     $$exifTool{DOC_NUM} = 0;
     310    $$et{DOC_NUM} = 0;
     311    if ($docNum > 1 and not $et->Options('Duplicates')) {
     312        $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
     313    }
    286314
    287315    return 1;
     
    294322sub ProcessGZIP($$)
    295323{
    296     my ($exifTool, $dirInfo) = @_;
     324    my ($et, $dirInfo) = @_;
    297325    my $raf = $$dirInfo{RAF};
    298326    my ($flags, $buff);
     
    300328    return 0 unless $raf->Read($buff, 10) and $buff =~ /^\x1f\x8b\x08/;
    301329
    302     $exifTool->SetFileType();
     330    $et->SetFileType();
    303331    SetByteOrder('II');
    304332
    305333    my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::GZIP');
    306     $exifTool->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2));
    307     $exifTool->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3));
    308     $exifTool->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4));
    309     $exifTool->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8));
    310     $exifTool->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9));
     334    $et->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2));
     335    $et->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3));
     336    $et->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4));
     337    $et->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8));
     338    $et->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9));
    311339
    312340    # extract file name and comment if they exist
     
    330358            # it because I could just as easily screw it up)
    331359            my $str = substr($buff, $pos, $end - $pos);
    332             $exifTool->HandleTag($tagTablePtr, $tagID, $str);
     360            $et->HandleTag($tagTablePtr, $tagID, $str);
    333361            last if $end >= length $buff;
    334362            $pos = $end + 1;
     
    343371sub HandleMember($$;$)
    344372{
    345     my ($exifTool, $member, $tagTablePtr) = @_;
     373    my ($et, $member, $tagTablePtr) = @_;
    346374    $tagTablePtr or  $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
    347     $exifTool->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract());
    348     $exifTool->HandleTag($tagTablePtr, 3, $member->bitFlag());
    349     $exifTool->HandleTag($tagTablePtr, 4, $member->compressionMethod());
    350     $exifTool->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime());
    351     $exifTool->HandleTag($tagTablePtr, 7, $member->crc32());
    352     $exifTool->HandleTag($tagTablePtr, 9, $member->compressedSize());
    353     $exifTool->HandleTag($tagTablePtr, 11, $member->uncompressedSize());
    354     $exifTool->HandleTag($tagTablePtr, 15, $member->fileName());
     375    $et->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract());
     376    $et->HandleTag($tagTablePtr, 3, $member->bitFlag());
     377    $et->HandleTag($tagTablePtr, 4, $member->compressionMethod());
     378    $et->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime());
     379    $et->HandleTag($tagTablePtr, 7, $member->crc32());
     380    $et->HandleTag($tagTablePtr, 9, $member->compressedSize());
     381    $et->HandleTag($tagTablePtr, 11, $member->uncompressedSize());
     382    $et->HandleTag($tagTablePtr, 15, $member->fileName());
    355383}
    356384
    357385#------------------------------------------------------------------------------
    358 # Extract information from an ZIP file
     386# Extract information from a ZIP file
    359387# Inputs: 0) ExifTool object reference, 1) dirInfo reference
    360388# Returns: 1 on success, 0 if this wasn't a valid ZIP file
    361389sub ProcessZIP($$)
    362390{
    363     my ($exifTool, $dirInfo) = @_;
     391    my ($et, $dirInfo) = @_;
    364392    my $raf = $$dirInfo{RAF};
    365     my ($buff, $buf2, $zip, $docNum);
    366 
    367     return 0 unless $raf->Read($buff, 30) and $buff =~ /^PK\x03\x04/;
     393    my ($buff, $buf2, $zip);
     394
     395    return 0 unless $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/;
    368396
    369397    my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
    370 
    371     # use Archive::Zip if avilable
     398    my $docNum = 0;
     399
     400    # use Archive::Zip if available
    372401    for (;;) {
    373         unless (eval 'require Archive::Zip' and eval 'require IO::File') {
    374             if ($$exifTool{FILE_EXT} and $$exifTool{FILE_EXT} ne 'ZIP') {
    375                 $exifTool->Warn("Install Archive::Zip to decode compressed ZIP information");
     402        unless (eval { require Archive::Zip } and eval { require IO::File }) {
     403            if ($$et{FILE_EXT} and $$et{FILE_EXT} ne 'ZIP') {
     404                $et->Warn("Install Archive::Zip to decode compressed ZIP information");
    376405            }
    377406            last;
    378407        }
    379408        # Archive::Zip requires a seekable IO::File object
    380         my $fh = $raf->{FILE_PT};
    381         if ($fh and seek($fh, 0, 0)) {
    382             unless (eval 'require IO::File') {
     409        my $fh;
     410        if ($raf->{TESTED} >= 0) {
     411            unless (eval { require IO::File }) {
    383412                # (this shouldn't happen because IO::File is a prerequisite of Archive::Zip)
    384                 $exifTool->Warn("Install IO::File to decode compressed ZIP information");
     413                $et->Warn("Install IO::File to decode compressed ZIP information");
    385414                last;
    386415            }
     416            $raf->Seek(0,0);
     417            $fh = $raf->{FILE_PT};
    387418            bless $fh, 'IO::File';  # Archive::Zip expects an IO::File object
    388         } elsif (eval 'require IO::String') {
     419        } elsif (eval { require IO::String }) {
    389420            # read the whole file into memory (what else can I do?)
    390421            $raf->Slurp();
    391422            $fh = new IO::String ${$raf->{BUFF_PT}};
    392423        } else {
    393             my $type = $fh ? 'pipe or socket' : 'scalar reference';
    394             $exifTool->Warn("Install IO::String to decode compressed ZIP information from a $type");
     424            my $type = $raf->{FILE_PT} ? 'pipe or socket' : 'scalar reference';
     425            $et->Warn("Install IO::String to decode compressed ZIP information from a $type");
    395426            last;
    396427        }
    397         $exifTool->VPrint(1, "  --- using Archive::Zip ---\n");
     428        $et->VPrint(1, "  --- using Archive::Zip ---\n");
    398429        $zip = new Archive::Zip;
    399430        # catch all warnings! (Archive::Zip is bad for this)
    400431        local $SIG{'__WARN__'} = \&WarnProc;
    401432        my $status = $zip->readFromFileHandle($fh);
     433        if ($status eq '4' and $raf->{TESTED} >= 0 and eval { require IO::String } and
     434            $raf->Seek(0,2) and $raf->Tell() < 100000000)
     435        {
     436            # try again, reading it ourself this time in an attempt to avoid
     437            # a failed test with Perl 5.6.2 GNU/Linux 2.6.32-5-686 i686-linux-64int-ld
     438            $raf->Seek(0,0);
     439            $raf->Slurp();
     440            $fh = new IO::String ${$raf->{BUFF_PT}};
     441            $zip = new Archive::Zip;
     442            $status = $zip->readFromFileHandle($fh);
     443        }
    402444        if ($status) {
    403445            undef $zip;
    404446            my %err = ( 1=>'Stream end error', 3=>'Format error', 4=>'IO error' );
    405447            my $err = $err{$status} || "Error $status";
    406             $exifTool->Warn("$err reading ZIP file");
     448            $et->Warn("$err reading ZIP file");
    407449            last;
    408450        }
     
    415457        if ($cType) {
    416458            ($buff, $status) = $zip->contents($cType);
    417             if (not $status and $buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/) {
     459            if (not $status and (
     460                # first look for the main document with the expected name
     461                $buff =~ m{\sPartName\s*=\s*['"](?:/ppt/presentation.xml|/word/document.xml|/xl/workbook.xml)['"][^>]*\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1} or
     462                # then look for the main part
     463                $buff =~ /<Override[^>]*\sPartName[^<]+\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/ or
     464                # and if all else fails, use the default main
     465                $buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/))
     466            {
    418467                $mime = $2;
    419468            }
     
    424473            $$dirInfo{MIME} = $mime;
    425474            require Image::ExifTool::OOXML;
    426             Image::ExifTool::OOXML::ProcessDOCX($exifTool, $dirInfo);
     475            Image::ExifTool::OOXML::ProcessDOCX($et, $dirInfo);
    427476            delete $$dirInfo{MIME};
    428477            last;
     
    433482        if (@members) {
    434483            require Image::ExifTool::CaptureOne;
    435             Image::ExifTool::CaptureOne::ProcessEIP($exifTool, $dirInfo);
     484            Image::ExifTool::CaptureOne::ProcessEIP($et, $dirInfo);
    436485            last;
    437486        }
    438487
    439488        # check for an iWork file
    440         @members = $zip->membersMatching('^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg)$');
     489        @members = $zip->membersMatching('(?i)^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+\.(pages|numbers|key)/Index.(zip|xml|apxl))$');
    441490        if (@members) {
    442491            require Image::ExifTool::iWork;
    443             Image::ExifTool::iWork::Process_iWork($exifTool, $dirInfo);
     492            Image::ExifTool::iWork::Process_iWork($et, $dirInfo);
    444493            last;
    445494        }
    446495
    447         # check for an Open Document file
     496        # check for an Open Document, IDML or EPUB file
    448497        my $mType = $zip->memberNamed('mimetype');
    449498        if ($mType) {
    450499            ($mime, $status) = $zip->contents($mType);
    451             unless ($status) {
    452                 chomp $mime;
    453                 if ($openDocType{$mime}) {
    454                     $exifTool->SetFileType($openDocType{$mime}, $mime);
    455                     # extract Open Document metadata from "meta.xml"
    456                     my $meta = $zip->memberNamed('meta.xml');
    457                     if ($meta) {
    458                         ($buff, $status) = $zip->contents($meta);
    459                         unless ($status) {
    460                             my %dirInfo = (
    461                                 DataPt => \$buff,
    462                                 DirLen => length $buff,
    463                                 DataLen => length $buff,
    464                             );
    465                             my $xmpTable = GetTagTable('Image::ExifTool::XMP::Main');
    466                             $exifTool->ProcessDirectory(\%dirInfo, $xmpTable);
    467                         }
     500            if (not $status and $mime =~ /([\x21-\xfe]+)/s) {
     501                # clean up MIME type just in case (note that MIME is case insensitive)
     502                $mime = lc $1;
     503                $et->SetFileType($openDocType{$mime} || 'ZIP', $mime);
     504                $et->Warn("Unrecognized MIMEType $mime") unless $openDocType{$mime};
     505                # extract Open Document metadata from "meta.xml"
     506                my $meta = $zip->memberNamed('meta.xml');
     507                # IDML files have metadata in a different place (ref 6)
     508                $meta or $meta = $zip->memberNamed('META-INF/metadata.xml');
     509                if ($meta) {
     510                    ($buff, $status) = $zip->contents($meta);
     511                    unless ($status) {
     512                        my %dirInfo = (
     513                            DirName => 'XML',
     514                            DataPt  => \$buff,
     515                            DirLen  => length $buff,
     516                            DataLen => length $buff,
     517                        );
     518                        # (avoid structure warnings when copying from XML)
     519                        my $oldWarn = $$et{NO_STRUCT_WARN};
     520                        $$et{NO_STRUCT_WARN} = 1;
     521                        $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::Main'));
     522                        $$et{NO_STRUCT_WARN} = $oldWarn;
    468523                    }
    469                     # extract preview image(s) from "Thumbnails" directory
     524                }
     525                # process rootfile of EPUB container if applicable
     526                for (;;) {
     527                    last if $meta and $mime ne 'application/epub+zip';
     528                    my $container = $zip->memberNamed('META-INF/container.xml');
     529                    ($buff, $status) = $zip->contents($container);
     530                    last if $status;
     531                    $buff =~ /<rootfile\s+[^>]*?\bfull-path=(['"])(.*?)\1/s or last;
     532                    # load the rootfile data (OPF extension; contains XML metadata)
     533                    my $meta2 = $zip->memberNamed($2) or last;
     534                    $meta = $meta2;
     535                    ($buff, $status) = $zip->contents($meta);
     536                    last if $status;
     537                    # use opf:event to generate more meaningful tag names for dc:date
     538                    while ($buff =~ s{<dc:date opf:event="(\w+)">([^<]+)</dc:date>}{<dc:${1}Date>$2</dc:${1}Date>}s) {
     539                        my $dcTable = GetTagTable('Image::ExifTool::XMP::dc');
     540                        my $tag = "${1}Date";
     541                        AddTagToTable($dcTable, $tag, {
     542                            Name => ucfirst $tag,
     543                            Groups => { 2 => 'Time' },
     544                            List => 'Seq',
     545                            %Image::ExifTool::XMP::dateTimeInfo
     546                        }) unless $$dcTable{$tag};
     547                    }
     548                    my %dirInfo = (
     549                        DataPt => \$buff,
     550                        DirLen => length $buff,
     551                        DataLen => length $buff,
     552                        IgnoreProp => { 'package' => 1, metadata => 1 },
     553                    );
     554                    # (avoid structure warnings when copying from XML)
     555                    my $oldWarn = $$et{NO_STRUCT_WARN};
     556                    $$et{NO_STRUCT_WARN} = 1;
     557                    $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::XML'));
     558                    $$et{NO_STRUCT_WARN} = $oldWarn;
     559                    last;
     560                }
     561                if ($openDocType{$mime} or $meta) {
     562                    # extract preview image(s) from "Thumbnails" directory if they exist
    470563                    my $type;
    471564                    my %tag = ( jpg => 'PreviewImage', png => 'PreviewPNG' );
     
    474567                        next unless $thumb;
    475568                        ($buff, $status) = $zip->contents($thumb);
    476                         $exifTool->FoundTag($tag{$type}, $buff) unless $status;
     569                        $et->FoundTag($tag{$type}, $buff) unless $status;
    477570                    }
    478                     last;
     571                    last;   # all done since we recognized the MIME type or found metadata
    479572                }
     573                # continue on to list ZIP contents...
    480574            }
    481575        }
    482576
    483577        # otherwise just extract general ZIP information
    484         $exifTool->SetFileType();
     578        $et->SetFileType();
    485579        @members = $zip->members();
    486         $docNum = 0;
    487         my $member;
     580        my ($member, $iWorkType);
     581        # special files to extract
     582        my %extract = (
     583            'meta.json' => 1,
     584            'previews/preview.png' => 'PreviewPNG',
     585            'preview.jpg' => 'PreviewImage', # (iWork 2013 files)
     586            'preview-web.jpg' => 'OtherImage', # (iWork 2013 files)
     587            'preview-micro.jpg' => 'ThumbnailImage', # (iWork 2013 files)
     588            'QuickLook/Thumbnail.jpg' => 'ThumbnailImage', # (iWork 2009 files)
     589            'QuickLook/Preview.pdf' => 'PreviewPDF', # (iWork 2009 files)
     590        );
    488591        foreach $member (@members) {
    489             $$exifTool{DOC_NUM} = ++$docNum;
    490             HandleMember($exifTool, $member, $tagTablePtr);
    491         }
     592            $$et{DOC_NUM} = ++$docNum;
     593            HandleMember($et, $member, $tagTablePtr);
     594            my $file = $member->fileName();
     595            # extract things from Sketch files
     596            if ($extract{$file}) {
     597                ($buff, $status) = $zip->contents($member);
     598                $status and $et->Warn("Error extracting $file"), next;
     599                if ($file eq 'meta.json') {
     600                    $et->ExtractInfo(\$buff, { ReEntry => 1 });
     601                    if ($$et{VALUE}{App} and $$et{VALUE}{App} =~ /sketch/i) {
     602                        $et->OverrideFileType('SKETCH');
     603                    }
     604                } else {
     605                    $et->FoundTag($extract{$file} => $buff);
     606                }
     607            } elsif ($file eq 'Index/Document.iwa' and not $iWorkType) {
     608                my $type = $iWorkType{$$et{FILE_EXT} || ''};
     609                $iWorkType = $type || 'PAGES';
     610            } elsif ($iWorkFile{$file}) {
     611                $iWorkType = $iWorkFile{$file};
     612            }
     613        }
     614        $et->OverrideFileType($iWorkType) if $iWorkType;
    492615        last;
    493616    }
     
    495618    if ($zip) {
    496619        delete $$dirInfo{ZIP};
    497         delete $$exifTool{DOC_NUM};
     620        delete $$et{DOC_NUM};
     621        if ($docNum > 1 and not $et->Options('Duplicates')) {
     622            $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
     623        }
    498624        return 1;
    499625    }
     
    501627# process the ZIP file by hand (funny, but this seems easier than using Archive::Zip)
    502628#
    503     $docNum = 0;
    504     $exifTool->VPrint(1, "  -- processing as binary data --\n");
     629    $et->VPrint(1, "  -- processing as binary data --\n");
    505630    $raf->Seek(30, 0);
    506     $exifTool->SetFileType();
     631    $et->SetFileType();
    507632    SetByteOrder('II');
    508633
     
    523648        $raf->Read($buf2, $len) == $len or last;
    524649
    525         $$exifTool{DOC_NUM} = ++$docNum;
     650        $$et{DOC_NUM} = ++$docNum;
    526651        $buff .= $buf2;
    527652        my %dirInfo = (
     
    532657            DirLen => 30 + $len,
    533658        );
    534         $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
     659        $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
    535660        my $flags = Get16u(\$buff, 6);
    536661        if ($flags & 0x08) {
     
    539664            #  sizes are set to 0 in the header.  Instead, they are stored
    540665            #  after the compressed data with an optional header of 0x08074b50)
    541             $exifTool->Warn('Stream mode data encountered, file list may be incomplete');
     666            $et->Warn('Stream mode data encountered, file list may be incomplete');
    542667            last;
    543668        }
     
    546671        $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/ or last;
    547672    }
    548     delete $$exifTool{DOC_NUM};
     673    delete $$et{DOC_NUM};
     674    if ($docNum > 1 and not $et->Options('Duplicates')) {
     675        $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
     676    }
    549677    return 1;
    550678}
     
    566694This module contains definitions required by Image::ExifTool to extract meta
    567695information from ZIP, GZIP and RAR archives.  This includes ZIP-based file
    568 types like DOCX, PPTX, XLSX, ODP, ODS, ODT and EIP.
     696types like Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC,
     697ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One
     698Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML),
     699Electronic Publication (EPUB), and Sketch design files (SKETCH).
    569700
    570701=head1 AUTHOR
    571702
    572 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     703Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    573704
    574705This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.