Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/InDesign.pm

    r24107 r34921  
    1515use Image::ExifTool qw(:DataAccess :Utils);
    1616
    17 $VERSION = '1.02';
     17$VERSION = '1.06';
    1818
    1919# map for writing metadata to InDesign files (currently only write XMP)
     
    3333sub ProcessIND($$)
    3434{
    35     my ($exifTool, $dirInfo) = @_;
     35    my ($et, $dirInfo) = @_;
    3636    my $raf = $$dirInfo{RAF};
    3737    my $outfile = $$dirInfo{OutFile};
     
    4242    return 0 unless $hdr eq $masterPageGUID;
    4343    return 0 unless $raf->Read($buff, 8) == 8;
    44     $exifTool->SetFileType($buff eq 'DOCUMENT' ? 'INDD' : 'IND');   # set the FileType tag
     44    $et->SetFileType($buff eq 'DOCUMENT' ? 'INDD' : 'IND');   # set the FileType tag
    4545
    4646    # read the master pages
     
    7474    $pages < 2 and $err = 'Invalid page count', goto DONE;
    7575    my $pos = $pages * 4096;
    76     if ($pos > 0x7fffffff and not $exifTool->Options('LargeFileSupport')) {
     76    if ($pos > 0x7fffffff and not $et->Options('LargeFileSupport')) {
    7777        $err = 'InDesign files larger than 2 GB not supported (LargeFileSupport not set)';
    7878        goto DONE;
     
    8080    if ($outfile) {
    8181        # make XMP the preferred group for writing
    82         $exifTool->InitWriteDirs(\%indMap, 'XMP');
     82        $et->InitWriteDirs(\%indMap, 'XMP');
    8383
    8484        Write($outfile, $buff, $buf2) or $err = 1, goto DONE;
     
    9393    }
    9494    # scan through the contiguous objects for XMP
    95     my $verbose = $exifTool->Options('Verbose');
    96     my $out = $exifTool->Options('TextOut');
     95    my $verbose = $et->Options('Verbose');
     96    my $out = $et->Options('TextOut');
    9797    for (;;) {
    9898        $raf->Read($hdr, 32) or last;
     
    106106            printf $out "Contiguous object at offset 0x%x (%d bytes):\n", $raf->Tell(), $len;
    107107            if ($verbose > 2) {
    108                 my %parms = (Addr => $raf->Tell());
    109                 $parms{MaxLen} = $verbose > 3 ? 1024 : 96 if $verbose < 5;
    110                 $raf->Seek(-$raf->Read($buff, $len), 1) or $err = 1;
    111                 Image::ExifTool::HexDump(\$buff, undef, %parms);
     108                my $len2 = $len < 1024000 ? $len : 1024000;
     109                $raf->Seek(-$raf->Read($buff, $len2), 1) or $err = 1;
     110                $et->VerboseDump(\$buff, Addr => $raf->Tell());
    112111            }
    113112        }
     
    119118                my $lenWord = $1;   # save length word for writing later
    120119                $len -= 4;          # get length of XMP only
     120                $foundXMP = 1;
     121                # I have a sample where the XMP is 107 MB, and ActivePerl may run into
     122                # memory troubles (with its apparent 1 GB limit) if the XMP is larger
     123                # than about 400 MB, so guard against this
     124                if ($len > 300 * 1024 * 1024) {
     125                    my $msg = sprintf('Insanely large XMP (%.0f MB)', $len / (1024 * 1024));
     126                    if ($outfile) {
     127                        $et->Error($msg, 2) and $err = 1, last;
     128                    } elsif ($et->Options('IgnoreMinorErrors')) {
     129                        $et->Warn($msg);
     130                    } else {
     131                        $et->Warn("$msg. Ignored.", 1);
     132                        $err = 1;
     133                        last;
     134                    }
     135                }
    121136                # load and parse the XMP data
    122137                unless ($raf->Seek(-52, 1) and $raf->Read($buff, $len) == $len) {
     
    124139                    last;
    125140                }
    126                 $foundXMP = 1;
    127141                my %dirInfo = (
    128142                    DataPt  => \$buff,
     
    130144                    NoDelete => 1, # do not allow this to be deleted when writing
    131145                );
     146                # validate xmp data length (should be same as length in header - 4)
     147                my $xmpLen = unpack($streamInt32u, $lenWord);
     148                unless ($xmpLen == $len) {
     149                    if ($xmpLen < $len) {
     150                        $dirInfo{DirLen} = $xmpLen;
     151                    } else {
     152                        $err = 'Truncated XMP stream (missing ' . ($xmpLen - $len) . ' bytes)';
     153                    }
     154                }
    132155                my $tagTablePtr = GetTagTable('Image::ExifTool::XMP::Main');
    133156                if ($outfile) {
    134                     # validate xmp data length (should be same as length in header - 4)
    135                     my $xmpLen = unpack($streamInt32u, $lenWord);
    136                     unless ($xmpLen == $len) {
    137                         $err = "Incorrect XMP stream length ($xmpLen should be $len)";
    138                         last;
    139                     }
     157                    last if $err;
    140158                    # make sure that XMP is writable
    141159                    my $classID = Get32u(\$hdr, 20);
    142160                    $classID & 0x40000000 or $err = 'XMP stream is not writable', last;
    143                     my $xmp = $exifTool->WriteDirectory(\%dirInfo, $tagTablePtr);
     161                    my $xmp = $et->WriteDirectory(\%dirInfo, $tagTablePtr);
    144162                    if ($xmp and length $xmp) {
    145163                        # write new xmp with leading length word
     
    149167                        Set32u(0xffffffff, \$hdr, 28);
    150168                    } else {
    151                         $$exifTool{CHANGED} = 0;    # didn't change anything
    152                         $exifTool->Warn("Can't delete XMP as a block from InDesign file") if defined $xmp;
     169                        $$et{CHANGED} = 0;    # didn't change anything
     170                        $et->Warn("Can't delete XMP as a block from InDesign file") if defined $xmp;
    153171                        # put length word back at start of stream
    154172                        $buff = $lenWord . $buff;
    155173                    }
    156174                } else {
    157                     $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
     175                    $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
    158176                }
    159177                $len = 0;   # we got the full stream (nothing left to read)
     
    198216DONE:
    199217    if (not $err) {
    200         $exifTool->Warn('No XMP stream to edit') if $outfile and not $foundXMP;
     218        $et->Warn('No XMP stream to edit') if $outfile and not $foundXMP;
    201219        return 1;       # success!
    202220    } elsif (not $outfile) {
    203221        # issue warning on read error
    204         $exifTool->Warn($err) unless $err eq '1';
     222        $et->Warn($err) unless $err eq '1';
    205223    } elsif ($err ne '1') {
    206224        # set error and return success code
    207         $exifTool->Error($err);
     225        $et->Error($err);
    208226    } else {
    209227        return -1;      # write error
     
    236254InDesign files which previously contained XMP.
    237255
    238 3) File sizes of greater than 2 GB and are not currently supported because
    239 the ability to handle large files like this is system dependent.
     2563) File sizes of greater than 2 GB are supported only if the system supports
     257them and the LargeFileSupport option is enabled.
    240258
    241259=head1 AUTHOR
    242260
    243 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     261Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    244262
    245263This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.